]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/intel_pm.c
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include "i915_drv.h"
30 #include "intel_drv.h"
31 #include "../../../platform/x86/intel_ips.h"
32 #include <linux/module.h>
33
34 /**
35  * DOC: RC6
36  *
37  * RC6 is a special power stage which allows the GPU to enter an very
38  * low-voltage mode when idle, using down to 0V while at this stage.  This
39  * stage is entered automatically when the GPU is idle when RC6 support is
40  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
41  *
42  * There are different RC6 modes available in Intel GPU, which differentiate
43  * among each other with the latency required to enter and leave RC6 and
44  * voltage consumed by the GPU in different states.
45  *
46  * The combination of the following flags define which states GPU is allowed
47  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
48  * RC6pp is deepest RC6. Their support by hardware varies according to the
49  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
50  * which brings the most power savings; deeper states save more power, but
51  * require higher latency to switch to and wake up.
52  */
53 #define INTEL_RC6_ENABLE                        (1<<0)
54 #define INTEL_RC6p_ENABLE                       (1<<1)
55 #define INTEL_RC6pp_ENABLE                      (1<<2)
56
57 static void gen9_init_clock_gating(struct drm_device *dev)
58 {
59         struct drm_i915_private *dev_priv = dev->dev_private;
60
61         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
62         I915_WRITE(CHICKEN_PAR1_1,
63                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
64
65         I915_WRITE(GEN8_CONFIG0,
66                    I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
67
68         /* WaEnableChickenDCPR:skl,bxt,kbl */
69         I915_WRITE(GEN8_CHICKEN_DCPR_1,
70                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
71
72         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
73         /* WaFbcWakeMemOn:skl,bxt,kbl */
74         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
75                    DISP_FBC_WM_DIS |
76                    DISP_FBC_MEMORY_WAKE);
77
78         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
79         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
80                    ILK_DPFC_DISABLE_DUMMY0);
81 }
82
83 static void bxt_init_clock_gating(struct drm_device *dev)
84 {
85         struct drm_i915_private *dev_priv = dev->dev_private;
86
87         gen9_init_clock_gating(dev);
88
89         /* WaDisableSDEUnitClockGating:bxt */
90         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
91                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
92
93         /*
94          * FIXME:
95          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
96          */
97         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
98                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
99
100         /*
101          * Wa: Backlight PWM may stop in the asserted state, causing backlight
102          * to stay fully on.
103          */
104         if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
105                 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
106                            PWM1_GATING_DIS | PWM2_GATING_DIS);
107 }
108
109 static void i915_pineview_get_mem_freq(struct drm_device *dev)
110 {
111         struct drm_i915_private *dev_priv = dev->dev_private;
112         u32 tmp;
113
114         tmp = I915_READ(CLKCFG);
115
116         switch (tmp & CLKCFG_FSB_MASK) {
117         case CLKCFG_FSB_533:
118                 dev_priv->fsb_freq = 533; /* 133*4 */
119                 break;
120         case CLKCFG_FSB_800:
121                 dev_priv->fsb_freq = 800; /* 200*4 */
122                 break;
123         case CLKCFG_FSB_667:
124                 dev_priv->fsb_freq =  667; /* 167*4 */
125                 break;
126         case CLKCFG_FSB_400:
127                 dev_priv->fsb_freq = 400; /* 100*4 */
128                 break;
129         }
130
131         switch (tmp & CLKCFG_MEM_MASK) {
132         case CLKCFG_MEM_533:
133                 dev_priv->mem_freq = 533;
134                 break;
135         case CLKCFG_MEM_667:
136                 dev_priv->mem_freq = 667;
137                 break;
138         case CLKCFG_MEM_800:
139                 dev_priv->mem_freq = 800;
140                 break;
141         }
142
143         /* detect pineview DDR3 setting */
144         tmp = I915_READ(CSHRDDR3CTL);
145         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
146 }
147
148 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
149 {
150         struct drm_i915_private *dev_priv = dev->dev_private;
151         u16 ddrpll, csipll;
152
153         ddrpll = I915_READ16(DDRMPLL1);
154         csipll = I915_READ16(CSIPLL0);
155
156         switch (ddrpll & 0xff) {
157         case 0xc:
158                 dev_priv->mem_freq = 800;
159                 break;
160         case 0x10:
161                 dev_priv->mem_freq = 1066;
162                 break;
163         case 0x14:
164                 dev_priv->mem_freq = 1333;
165                 break;
166         case 0x18:
167                 dev_priv->mem_freq = 1600;
168                 break;
169         default:
170                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
171                                  ddrpll & 0xff);
172                 dev_priv->mem_freq = 0;
173                 break;
174         }
175
176         dev_priv->ips.r_t = dev_priv->mem_freq;
177
178         switch (csipll & 0x3ff) {
179         case 0x00c:
180                 dev_priv->fsb_freq = 3200;
181                 break;
182         case 0x00e:
183                 dev_priv->fsb_freq = 3733;
184                 break;
185         case 0x010:
186                 dev_priv->fsb_freq = 4266;
187                 break;
188         case 0x012:
189                 dev_priv->fsb_freq = 4800;
190                 break;
191         case 0x014:
192                 dev_priv->fsb_freq = 5333;
193                 break;
194         case 0x016:
195                 dev_priv->fsb_freq = 5866;
196                 break;
197         case 0x018:
198                 dev_priv->fsb_freq = 6400;
199                 break;
200         default:
201                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
202                                  csipll & 0x3ff);
203                 dev_priv->fsb_freq = 0;
204                 break;
205         }
206
207         if (dev_priv->fsb_freq == 3200) {
208                 dev_priv->ips.c_m = 0;
209         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
210                 dev_priv->ips.c_m = 1;
211         } else {
212                 dev_priv->ips.c_m = 2;
213         }
214 }
215
216 static const struct cxsr_latency cxsr_latency_table[] = {
217         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
218         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
219         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
220         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
221         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
222
223         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
224         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
225         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
226         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
227         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
228
229         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
230         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
231         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
232         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
233         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
234
235         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
236         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
237         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
238         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
239         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
240
241         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
242         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
243         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
244         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
245         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
246
247         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
248         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
249         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
250         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
251         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
252 };
253
254 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
255                                                          int is_ddr3,
256                                                          int fsb,
257                                                          int mem)
258 {
259         const struct cxsr_latency *latency;
260         int i;
261
262         if (fsb == 0 || mem == 0)
263                 return NULL;
264
265         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
266                 latency = &cxsr_latency_table[i];
267                 if (is_desktop == latency->is_desktop &&
268                     is_ddr3 == latency->is_ddr3 &&
269                     fsb == latency->fsb_freq && mem == latency->mem_freq)
270                         return latency;
271         }
272
273         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
274
275         return NULL;
276 }
277
278 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
279 {
280         u32 val;
281
282         mutex_lock(&dev_priv->rps.hw_lock);
283
284         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
285         if (enable)
286                 val &= ~FORCE_DDR_HIGH_FREQ;
287         else
288                 val |= FORCE_DDR_HIGH_FREQ;
289         val &= ~FORCE_DDR_LOW_FREQ;
290         val |= FORCE_DDR_FREQ_REQ_ACK;
291         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
292
293         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
294                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
295                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
296
297         mutex_unlock(&dev_priv->rps.hw_lock);
298 }
299
300 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
301 {
302         u32 val;
303
304         mutex_lock(&dev_priv->rps.hw_lock);
305
306         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
307         if (enable)
308                 val |= DSP_MAXFIFO_PM5_ENABLE;
309         else
310                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
311         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
312
313         mutex_unlock(&dev_priv->rps.hw_lock);
314 }
315
316 #define FW_WM(value, plane) \
317         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
318
319 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
320 {
321         struct drm_device *dev = dev_priv->dev;
322         u32 val;
323
324         if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
325                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
326                 POSTING_READ(FW_BLC_SELF_VLV);
327                 dev_priv->wm.vlv.cxsr = enable;
328         } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
329                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
330                 POSTING_READ(FW_BLC_SELF);
331         } else if (IS_PINEVIEW(dev)) {
332                 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
333                 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
334                 I915_WRITE(DSPFW3, val);
335                 POSTING_READ(DSPFW3);
336         } else if (IS_I945G(dev) || IS_I945GM(dev)) {
337                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
338                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
339                 I915_WRITE(FW_BLC_SELF, val);
340                 POSTING_READ(FW_BLC_SELF);
341         } else if (IS_I915GM(dev)) {
342                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
343                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
344                 I915_WRITE(INSTPM, val);
345                 POSTING_READ(INSTPM);
346         } else {
347                 return;
348         }
349
350         DRM_DEBUG_KMS("memory self-refresh is %s\n",
351                       enable ? "enabled" : "disabled");
352 }
353
354
355 /*
356  * Latency for FIFO fetches is dependent on several factors:
357  *   - memory configuration (speed, channels)
358  *   - chipset
359  *   - current MCH state
360  * It can be fairly high in some situations, so here we assume a fairly
361  * pessimal value.  It's a tradeoff between extra memory fetches (if we
362  * set this value too high, the FIFO will fetch frequently to stay full)
363  * and power consumption (set it too low to save power and we might see
364  * FIFO underruns and display "flicker").
365  *
366  * A value of 5us seems to be a good balance; safe for very low end
367  * platforms but not overly aggressive on lower latency configs.
368  */
369 static const int pessimal_latency_ns = 5000;
370
371 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
372         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
373
374 static int vlv_get_fifo_size(struct drm_device *dev,
375                               enum pipe pipe, int plane)
376 {
377         struct drm_i915_private *dev_priv = dev->dev_private;
378         int sprite0_start, sprite1_start, size;
379
380         switch (pipe) {
381                 uint32_t dsparb, dsparb2, dsparb3;
382         case PIPE_A:
383                 dsparb = I915_READ(DSPARB);
384                 dsparb2 = I915_READ(DSPARB2);
385                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
386                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
387                 break;
388         case PIPE_B:
389                 dsparb = I915_READ(DSPARB);
390                 dsparb2 = I915_READ(DSPARB2);
391                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
392                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
393                 break;
394         case PIPE_C:
395                 dsparb2 = I915_READ(DSPARB2);
396                 dsparb3 = I915_READ(DSPARB3);
397                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
398                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
399                 break;
400         default:
401                 return 0;
402         }
403
404         switch (plane) {
405         case 0:
406                 size = sprite0_start;
407                 break;
408         case 1:
409                 size = sprite1_start - sprite0_start;
410                 break;
411         case 2:
412                 size = 512 - 1 - sprite1_start;
413                 break;
414         default:
415                 return 0;
416         }
417
418         DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
419                       pipe_name(pipe), plane == 0 ? "primary" : "sprite",
420                       plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
421                       size);
422
423         return size;
424 }
425
426 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
427 {
428         struct drm_i915_private *dev_priv = dev->dev_private;
429         uint32_t dsparb = I915_READ(DSPARB);
430         int size;
431
432         size = dsparb & 0x7f;
433         if (plane)
434                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
435
436         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
437                       plane ? "B" : "A", size);
438
439         return size;
440 }
441
442 static int i830_get_fifo_size(struct drm_device *dev, int plane)
443 {
444         struct drm_i915_private *dev_priv = dev->dev_private;
445         uint32_t dsparb = I915_READ(DSPARB);
446         int size;
447
448         size = dsparb & 0x1ff;
449         if (plane)
450                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
451         size >>= 1; /* Convert to cachelines */
452
453         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
454                       plane ? "B" : "A", size);
455
456         return size;
457 }
458
459 static int i845_get_fifo_size(struct drm_device *dev, int plane)
460 {
461         struct drm_i915_private *dev_priv = dev->dev_private;
462         uint32_t dsparb = I915_READ(DSPARB);
463         int size;
464
465         size = dsparb & 0x7f;
466         size >>= 2; /* Convert to cachelines */
467
468         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
469                       plane ? "B" : "A",
470                       size);
471
472         return size;
473 }
474
475 /* Pineview has different values for various configs */
476 static const struct intel_watermark_params pineview_display_wm = {
477         .fifo_size = PINEVIEW_DISPLAY_FIFO,
478         .max_wm = PINEVIEW_MAX_WM,
479         .default_wm = PINEVIEW_DFT_WM,
480         .guard_size = PINEVIEW_GUARD_WM,
481         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
482 };
483 static const struct intel_watermark_params pineview_display_hplloff_wm = {
484         .fifo_size = PINEVIEW_DISPLAY_FIFO,
485         .max_wm = PINEVIEW_MAX_WM,
486         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
487         .guard_size = PINEVIEW_GUARD_WM,
488         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
489 };
490 static const struct intel_watermark_params pineview_cursor_wm = {
491         .fifo_size = PINEVIEW_CURSOR_FIFO,
492         .max_wm = PINEVIEW_CURSOR_MAX_WM,
493         .default_wm = PINEVIEW_CURSOR_DFT_WM,
494         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
495         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
496 };
497 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
498         .fifo_size = PINEVIEW_CURSOR_FIFO,
499         .max_wm = PINEVIEW_CURSOR_MAX_WM,
500         .default_wm = PINEVIEW_CURSOR_DFT_WM,
501         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
502         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
503 };
504 static const struct intel_watermark_params g4x_wm_info = {
505         .fifo_size = G4X_FIFO_SIZE,
506         .max_wm = G4X_MAX_WM,
507         .default_wm = G4X_MAX_WM,
508         .guard_size = 2,
509         .cacheline_size = G4X_FIFO_LINE_SIZE,
510 };
511 static const struct intel_watermark_params g4x_cursor_wm_info = {
512         .fifo_size = I965_CURSOR_FIFO,
513         .max_wm = I965_CURSOR_MAX_WM,
514         .default_wm = I965_CURSOR_DFT_WM,
515         .guard_size = 2,
516         .cacheline_size = G4X_FIFO_LINE_SIZE,
517 };
518 static const struct intel_watermark_params i965_cursor_wm_info = {
519         .fifo_size = I965_CURSOR_FIFO,
520         .max_wm = I965_CURSOR_MAX_WM,
521         .default_wm = I965_CURSOR_DFT_WM,
522         .guard_size = 2,
523         .cacheline_size = I915_FIFO_LINE_SIZE,
524 };
525 static const struct intel_watermark_params i945_wm_info = {
526         .fifo_size = I945_FIFO_SIZE,
527         .max_wm = I915_MAX_WM,
528         .default_wm = 1,
529         .guard_size = 2,
530         .cacheline_size = I915_FIFO_LINE_SIZE,
531 };
532 static const struct intel_watermark_params i915_wm_info = {
533         .fifo_size = I915_FIFO_SIZE,
534         .max_wm = I915_MAX_WM,
535         .default_wm = 1,
536         .guard_size = 2,
537         .cacheline_size = I915_FIFO_LINE_SIZE,
538 };
539 static const struct intel_watermark_params i830_a_wm_info = {
540         .fifo_size = I855GM_FIFO_SIZE,
541         .max_wm = I915_MAX_WM,
542         .default_wm = 1,
543         .guard_size = 2,
544         .cacheline_size = I830_FIFO_LINE_SIZE,
545 };
546 static const struct intel_watermark_params i830_bc_wm_info = {
547         .fifo_size = I855GM_FIFO_SIZE,
548         .max_wm = I915_MAX_WM/2,
549         .default_wm = 1,
550         .guard_size = 2,
551         .cacheline_size = I830_FIFO_LINE_SIZE,
552 };
553 static const struct intel_watermark_params i845_wm_info = {
554         .fifo_size = I830_FIFO_SIZE,
555         .max_wm = I915_MAX_WM,
556         .default_wm = 1,
557         .guard_size = 2,
558         .cacheline_size = I830_FIFO_LINE_SIZE,
559 };
560
561 /**
562  * intel_calculate_wm - calculate watermark level
563  * @clock_in_khz: pixel clock
564  * @wm: chip FIFO params
565  * @cpp: bytes per pixel
566  * @latency_ns: memory latency for the platform
567  *
568  * Calculate the watermark level (the level at which the display plane will
569  * start fetching from memory again).  Each chip has a different display
570  * FIFO size and allocation, so the caller needs to figure that out and pass
571  * in the correct intel_watermark_params structure.
572  *
573  * As the pixel clock runs, the FIFO will be drained at a rate that depends
574  * on the pixel size.  When it reaches the watermark level, it'll start
575  * fetching FIFO line sized based chunks from memory until the FIFO fills
576  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
577  * will occur, and a display engine hang could result.
578  */
579 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
580                                         const struct intel_watermark_params *wm,
581                                         int fifo_size, int cpp,
582                                         unsigned long latency_ns)
583 {
584         long entries_required, wm_size;
585
586         /*
587          * Note: we need to make sure we don't overflow for various clock &
588          * latency values.
589          * clocks go from a few thousand to several hundred thousand.
590          * latency is usually a few thousand
591          */
592         entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) /
593                 1000;
594         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
595
596         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
597
598         wm_size = fifo_size - (entries_required + wm->guard_size);
599
600         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
601
602         /* Don't promote wm_size to unsigned... */
603         if (wm_size > (long)wm->max_wm)
604                 wm_size = wm->max_wm;
605         if (wm_size <= 0)
606                 wm_size = wm->default_wm;
607
608         /*
609          * Bspec seems to indicate that the value shouldn't be lower than
610          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
611          * Lets go for 8 which is the burst size since certain platforms
612          * already use a hardcoded 8 (which is what the spec says should be
613          * done).
614          */
615         if (wm_size <= 8)
616                 wm_size = 8;
617
618         return wm_size;
619 }
620
621 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
622 {
623         struct drm_crtc *crtc, *enabled = NULL;
624
625         for_each_crtc(dev, crtc) {
626                 if (intel_crtc_active(crtc)) {
627                         if (enabled)
628                                 return NULL;
629                         enabled = crtc;
630                 }
631         }
632
633         return enabled;
634 }
635
636 static void pineview_update_wm(struct drm_crtc *unused_crtc)
637 {
638         struct drm_device *dev = unused_crtc->dev;
639         struct drm_i915_private *dev_priv = dev->dev_private;
640         struct drm_crtc *crtc;
641         const struct cxsr_latency *latency;
642         u32 reg;
643         unsigned long wm;
644
645         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
646                                          dev_priv->fsb_freq, dev_priv->mem_freq);
647         if (!latency) {
648                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
649                 intel_set_memory_cxsr(dev_priv, false);
650                 return;
651         }
652
653         crtc = single_enabled_crtc(dev);
654         if (crtc) {
655                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
656                 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
657                 int clock = adjusted_mode->crtc_clock;
658
659                 /* Display SR */
660                 wm = intel_calculate_wm(clock, &pineview_display_wm,
661                                         pineview_display_wm.fifo_size,
662                                         cpp, latency->display_sr);
663                 reg = I915_READ(DSPFW1);
664                 reg &= ~DSPFW_SR_MASK;
665                 reg |= FW_WM(wm, SR);
666                 I915_WRITE(DSPFW1, reg);
667                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
668
669                 /* cursor SR */
670                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
671                                         pineview_display_wm.fifo_size,
672                                         cpp, latency->cursor_sr);
673                 reg = I915_READ(DSPFW3);
674                 reg &= ~DSPFW_CURSOR_SR_MASK;
675                 reg |= FW_WM(wm, CURSOR_SR);
676                 I915_WRITE(DSPFW3, reg);
677
678                 /* Display HPLL off SR */
679                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
680                                         pineview_display_hplloff_wm.fifo_size,
681                                         cpp, latency->display_hpll_disable);
682                 reg = I915_READ(DSPFW3);
683                 reg &= ~DSPFW_HPLL_SR_MASK;
684                 reg |= FW_WM(wm, HPLL_SR);
685                 I915_WRITE(DSPFW3, reg);
686
687                 /* cursor HPLL off SR */
688                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
689                                         pineview_display_hplloff_wm.fifo_size,
690                                         cpp, latency->cursor_hpll_disable);
691                 reg = I915_READ(DSPFW3);
692                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
693                 reg |= FW_WM(wm, HPLL_CURSOR);
694                 I915_WRITE(DSPFW3, reg);
695                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
696
697                 intel_set_memory_cxsr(dev_priv, true);
698         } else {
699                 intel_set_memory_cxsr(dev_priv, false);
700         }
701 }
702
703 static bool g4x_compute_wm0(struct drm_device *dev,
704                             int plane,
705                             const struct intel_watermark_params *display,
706                             int display_latency_ns,
707                             const struct intel_watermark_params *cursor,
708                             int cursor_latency_ns,
709                             int *plane_wm,
710                             int *cursor_wm)
711 {
712         struct drm_crtc *crtc;
713         const struct drm_display_mode *adjusted_mode;
714         int htotal, hdisplay, clock, cpp;
715         int line_time_us, line_count;
716         int entries, tlb_miss;
717
718         crtc = intel_get_crtc_for_plane(dev, plane);
719         if (!intel_crtc_active(crtc)) {
720                 *cursor_wm = cursor->guard_size;
721                 *plane_wm = display->guard_size;
722                 return false;
723         }
724
725         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
726         clock = adjusted_mode->crtc_clock;
727         htotal = adjusted_mode->crtc_htotal;
728         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
729         cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
730
731         /* Use the small buffer method to calculate plane watermark */
732         entries = ((clock * cpp / 1000) * display_latency_ns) / 1000;
733         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
734         if (tlb_miss > 0)
735                 entries += tlb_miss;
736         entries = DIV_ROUND_UP(entries, display->cacheline_size);
737         *plane_wm = entries + display->guard_size;
738         if (*plane_wm > (int)display->max_wm)
739                 *plane_wm = display->max_wm;
740
741         /* Use the large buffer method to calculate cursor watermark */
742         line_time_us = max(htotal * 1000 / clock, 1);
743         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
744         entries = line_count * crtc->cursor->state->crtc_w * cpp;
745         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
746         if (tlb_miss > 0)
747                 entries += tlb_miss;
748         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
749         *cursor_wm = entries + cursor->guard_size;
750         if (*cursor_wm > (int)cursor->max_wm)
751                 *cursor_wm = (int)cursor->max_wm;
752
753         return true;
754 }
755
756 /*
757  * Check the wm result.
758  *
759  * If any calculated watermark values is larger than the maximum value that
760  * can be programmed into the associated watermark register, that watermark
761  * must be disabled.
762  */
763 static bool g4x_check_srwm(struct drm_device *dev,
764                            int display_wm, int cursor_wm,
765                            const struct intel_watermark_params *display,
766                            const struct intel_watermark_params *cursor)
767 {
768         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
769                       display_wm, cursor_wm);
770
771         if (display_wm > display->max_wm) {
772                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
773                               display_wm, display->max_wm);
774                 return false;
775         }
776
777         if (cursor_wm > cursor->max_wm) {
778                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
779                               cursor_wm, cursor->max_wm);
780                 return false;
781         }
782
783         if (!(display_wm || cursor_wm)) {
784                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
785                 return false;
786         }
787
788         return true;
789 }
790
791 static bool g4x_compute_srwm(struct drm_device *dev,
792                              int plane,
793                              int latency_ns,
794                              const struct intel_watermark_params *display,
795                              const struct intel_watermark_params *cursor,
796                              int *display_wm, int *cursor_wm)
797 {
798         struct drm_crtc *crtc;
799         const struct drm_display_mode *adjusted_mode;
800         int hdisplay, htotal, cpp, clock;
801         unsigned long line_time_us;
802         int line_count, line_size;
803         int small, large;
804         int entries;
805
806         if (!latency_ns) {
807                 *display_wm = *cursor_wm = 0;
808                 return false;
809         }
810
811         crtc = intel_get_crtc_for_plane(dev, plane);
812         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
813         clock = adjusted_mode->crtc_clock;
814         htotal = adjusted_mode->crtc_htotal;
815         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
816         cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
817
818         line_time_us = max(htotal * 1000 / clock, 1);
819         line_count = (latency_ns / line_time_us + 1000) / 1000;
820         line_size = hdisplay * cpp;
821
822         /* Use the minimum of the small and large buffer method for primary */
823         small = ((clock * cpp / 1000) * latency_ns) / 1000;
824         large = line_count * line_size;
825
826         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
827         *display_wm = entries + display->guard_size;
828
829         /* calculate the self-refresh watermark for display cursor */
830         entries = line_count * cpp * crtc->cursor->state->crtc_w;
831         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
832         *cursor_wm = entries + cursor->guard_size;
833
834         return g4x_check_srwm(dev,
835                               *display_wm, *cursor_wm,
836                               display, cursor);
837 }
838
839 #define FW_WM_VLV(value, plane) \
840         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
841
842 static void vlv_write_wm_values(struct intel_crtc *crtc,
843                                 const struct vlv_wm_values *wm)
844 {
845         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
846         enum pipe pipe = crtc->pipe;
847
848         I915_WRITE(VLV_DDL(pipe),
849                    (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
850                    (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
851                    (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
852                    (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
853
854         I915_WRITE(DSPFW1,
855                    FW_WM(wm->sr.plane, SR) |
856                    FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
857                    FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
858                    FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
859         I915_WRITE(DSPFW2,
860                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
861                    FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
862                    FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
863         I915_WRITE(DSPFW3,
864                    FW_WM(wm->sr.cursor, CURSOR_SR));
865
866         if (IS_CHERRYVIEW(dev_priv)) {
867                 I915_WRITE(DSPFW7_CHV,
868                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
869                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
870                 I915_WRITE(DSPFW8_CHV,
871                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
872                            FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
873                 I915_WRITE(DSPFW9_CHV,
874                            FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
875                            FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
876                 I915_WRITE(DSPHOWM,
877                            FW_WM(wm->sr.plane >> 9, SR_HI) |
878                            FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
879                            FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
880                            FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
881                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
882                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
883                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
884                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
885                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
886                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
887         } else {
888                 I915_WRITE(DSPFW7,
889                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
890                            FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
891                 I915_WRITE(DSPHOWM,
892                            FW_WM(wm->sr.plane >> 9, SR_HI) |
893                            FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
894                            FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
895                            FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
896                            FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
897                            FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
898                            FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
899         }
900
901         /* zero (unused) WM1 watermarks */
902         I915_WRITE(DSPFW4, 0);
903         I915_WRITE(DSPFW5, 0);
904         I915_WRITE(DSPFW6, 0);
905         I915_WRITE(DSPHOWM1, 0);
906
907         POSTING_READ(DSPFW1);
908 }
909
910 #undef FW_WM_VLV
911
912 enum vlv_wm_level {
913         VLV_WM_LEVEL_PM2,
914         VLV_WM_LEVEL_PM5,
915         VLV_WM_LEVEL_DDR_DVFS,
916 };
917
918 /* latency must be in 0.1us units. */
919 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
920                                    unsigned int pipe_htotal,
921                                    unsigned int horiz_pixels,
922                                    unsigned int cpp,
923                                    unsigned int latency)
924 {
925         unsigned int ret;
926
927         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
928         ret = (ret + 1) * horiz_pixels * cpp;
929         ret = DIV_ROUND_UP(ret, 64);
930
931         return ret;
932 }
933
934 static void vlv_setup_wm_latency(struct drm_device *dev)
935 {
936         struct drm_i915_private *dev_priv = dev->dev_private;
937
938         /* all latencies in usec */
939         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
940
941         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
942
943         if (IS_CHERRYVIEW(dev_priv)) {
944                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
945                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
946
947                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
948         }
949 }
950
951 static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
952                                      struct intel_crtc *crtc,
953                                      const struct intel_plane_state *state,
954                                      int level)
955 {
956         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
957         int clock, htotal, cpp, width, wm;
958
959         if (dev_priv->wm.pri_latency[level] == 0)
960                 return USHRT_MAX;
961
962         if (!state->visible)
963                 return 0;
964
965         cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
966         clock = crtc->config->base.adjusted_mode.crtc_clock;
967         htotal = crtc->config->base.adjusted_mode.crtc_htotal;
968         width = crtc->config->pipe_src_w;
969         if (WARN_ON(htotal == 0))
970                 htotal = 1;
971
972         if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
973                 /*
974                  * FIXME the formula gives values that are
975                  * too big for the cursor FIFO, and hence we
976                  * would never be able to use cursors. For
977                  * now just hardcode the watermark.
978                  */
979                 wm = 63;
980         } else {
981                 wm = vlv_wm_method2(clock, htotal, width, cpp,
982                                     dev_priv->wm.pri_latency[level] * 10);
983         }
984
985         return min_t(int, wm, USHRT_MAX);
986 }
987
988 static void vlv_compute_fifo(struct intel_crtc *crtc)
989 {
990         struct drm_device *dev = crtc->base.dev;
991         struct vlv_wm_state *wm_state = &crtc->wm_state;
992         struct intel_plane *plane;
993         unsigned int total_rate = 0;
994         const int fifo_size = 512 - 1;
995         int fifo_extra, fifo_left = fifo_size;
996
997         for_each_intel_plane_on_crtc(dev, crtc, plane) {
998                 struct intel_plane_state *state =
999                         to_intel_plane_state(plane->base.state);
1000
1001                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1002                         continue;
1003
1004                 if (state->visible) {
1005                         wm_state->num_active_planes++;
1006                         total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1007                 }
1008         }
1009
1010         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1011                 struct intel_plane_state *state =
1012                         to_intel_plane_state(plane->base.state);
1013                 unsigned int rate;
1014
1015                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1016                         plane->wm.fifo_size = 63;
1017                         continue;
1018                 }
1019
1020                 if (!state->visible) {
1021                         plane->wm.fifo_size = 0;
1022                         continue;
1023                 }
1024
1025                 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1026                 plane->wm.fifo_size = fifo_size * rate / total_rate;
1027                 fifo_left -= plane->wm.fifo_size;
1028         }
1029
1030         fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1031
1032         /* spread the remainder evenly */
1033         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1034                 int plane_extra;
1035
1036                 if (fifo_left == 0)
1037                         break;
1038
1039                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1040                         continue;
1041
1042                 /* give it all to the first plane if none are active */
1043                 if (plane->wm.fifo_size == 0 &&
1044                     wm_state->num_active_planes)
1045                         continue;
1046
1047                 plane_extra = min(fifo_extra, fifo_left);
1048                 plane->wm.fifo_size += plane_extra;
1049                 fifo_left -= plane_extra;
1050         }
1051
1052         WARN_ON(fifo_left != 0);
1053 }
1054
1055 static void vlv_invert_wms(struct intel_crtc *crtc)
1056 {
1057         struct vlv_wm_state *wm_state = &crtc->wm_state;
1058         int level;
1059
1060         for (level = 0; level < wm_state->num_levels; level++) {
1061                 struct drm_device *dev = crtc->base.dev;
1062                 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1063                 struct intel_plane *plane;
1064
1065                 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1066                 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1067
1068                 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1069                         switch (plane->base.type) {
1070                                 int sprite;
1071                         case DRM_PLANE_TYPE_CURSOR:
1072                                 wm_state->wm[level].cursor = plane->wm.fifo_size -
1073                                         wm_state->wm[level].cursor;
1074                                 break;
1075                         case DRM_PLANE_TYPE_PRIMARY:
1076                                 wm_state->wm[level].primary = plane->wm.fifo_size -
1077                                         wm_state->wm[level].primary;
1078                                 break;
1079                         case DRM_PLANE_TYPE_OVERLAY:
1080                                 sprite = plane->plane;
1081                                 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1082                                         wm_state->wm[level].sprite[sprite];
1083                                 break;
1084                         }
1085                 }
1086         }
1087 }
1088
1089 static void vlv_compute_wm(struct intel_crtc *crtc)
1090 {
1091         struct drm_device *dev = crtc->base.dev;
1092         struct vlv_wm_state *wm_state = &crtc->wm_state;
1093         struct intel_plane *plane;
1094         int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1095         int level;
1096
1097         memset(wm_state, 0, sizeof(*wm_state));
1098
1099         wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1100         wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1101
1102         wm_state->num_active_planes = 0;
1103
1104         vlv_compute_fifo(crtc);
1105
1106         if (wm_state->num_active_planes != 1)
1107                 wm_state->cxsr = false;
1108
1109         if (wm_state->cxsr) {
1110                 for (level = 0; level < wm_state->num_levels; level++) {
1111                         wm_state->sr[level].plane = sr_fifo_size;
1112                         wm_state->sr[level].cursor = 63;
1113                 }
1114         }
1115
1116         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1117                 struct intel_plane_state *state =
1118                         to_intel_plane_state(plane->base.state);
1119
1120                 if (!state->visible)
1121                         continue;
1122
1123                 /* normal watermarks */
1124                 for (level = 0; level < wm_state->num_levels; level++) {
1125                         int wm = vlv_compute_wm_level(plane, crtc, state, level);
1126                         int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1127
1128                         /* hack */
1129                         if (WARN_ON(level == 0 && wm > max_wm))
1130                                 wm = max_wm;
1131
1132                         if (wm > plane->wm.fifo_size)
1133                                 break;
1134
1135                         switch (plane->base.type) {
1136                                 int sprite;
1137                         case DRM_PLANE_TYPE_CURSOR:
1138                                 wm_state->wm[level].cursor = wm;
1139                                 break;
1140                         case DRM_PLANE_TYPE_PRIMARY:
1141                                 wm_state->wm[level].primary = wm;
1142                                 break;
1143                         case DRM_PLANE_TYPE_OVERLAY:
1144                                 sprite = plane->plane;
1145                                 wm_state->wm[level].sprite[sprite] = wm;
1146                                 break;
1147                         }
1148                 }
1149
1150                 wm_state->num_levels = level;
1151
1152                 if (!wm_state->cxsr)
1153                         continue;
1154
1155                 /* maxfifo watermarks */
1156                 switch (plane->base.type) {
1157                         int sprite, level;
1158                 case DRM_PLANE_TYPE_CURSOR:
1159                         for (level = 0; level < wm_state->num_levels; level++)
1160                                 wm_state->sr[level].cursor =
1161                                         wm_state->wm[level].cursor;
1162                         break;
1163                 case DRM_PLANE_TYPE_PRIMARY:
1164                         for (level = 0; level < wm_state->num_levels; level++)
1165                                 wm_state->sr[level].plane =
1166                                         min(wm_state->sr[level].plane,
1167                                             wm_state->wm[level].primary);
1168                         break;
1169                 case DRM_PLANE_TYPE_OVERLAY:
1170                         sprite = plane->plane;
1171                         for (level = 0; level < wm_state->num_levels; level++)
1172                                 wm_state->sr[level].plane =
1173                                         min(wm_state->sr[level].plane,
1174                                             wm_state->wm[level].sprite[sprite]);
1175                         break;
1176                 }
1177         }
1178
1179         /* clear any (partially) filled invalid levels */
1180         for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1181                 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1182                 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1183         }
1184
1185         vlv_invert_wms(crtc);
1186 }
1187
1188 #define VLV_FIFO(plane, value) \
1189         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1190
1191 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1192 {
1193         struct drm_device *dev = crtc->base.dev;
1194         struct drm_i915_private *dev_priv = to_i915(dev);
1195         struct intel_plane *plane;
1196         int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1197
1198         for_each_intel_plane_on_crtc(dev, crtc, plane) {
1199                 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1200                         WARN_ON(plane->wm.fifo_size != 63);
1201                         continue;
1202                 }
1203
1204                 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1205                         sprite0_start = plane->wm.fifo_size;
1206                 else if (plane->plane == 0)
1207                         sprite1_start = sprite0_start + plane->wm.fifo_size;
1208                 else
1209                         fifo_size = sprite1_start + plane->wm.fifo_size;
1210         }
1211
1212         WARN_ON(fifo_size != 512 - 1);
1213
1214         DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1215                       pipe_name(crtc->pipe), sprite0_start,
1216                       sprite1_start, fifo_size);
1217
1218         switch (crtc->pipe) {
1219                 uint32_t dsparb, dsparb2, dsparb3;
1220         case PIPE_A:
1221                 dsparb = I915_READ(DSPARB);
1222                 dsparb2 = I915_READ(DSPARB2);
1223
1224                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1225                             VLV_FIFO(SPRITEB, 0xff));
1226                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1227                            VLV_FIFO(SPRITEB, sprite1_start));
1228
1229                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1230                              VLV_FIFO(SPRITEB_HI, 0x1));
1231                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1232                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1233
1234                 I915_WRITE(DSPARB, dsparb);
1235                 I915_WRITE(DSPARB2, dsparb2);
1236                 break;
1237         case PIPE_B:
1238                 dsparb = I915_READ(DSPARB);
1239                 dsparb2 = I915_READ(DSPARB2);
1240
1241                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1242                             VLV_FIFO(SPRITED, 0xff));
1243                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1244                            VLV_FIFO(SPRITED, sprite1_start));
1245
1246                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1247                              VLV_FIFO(SPRITED_HI, 0xff));
1248                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1249                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1250
1251                 I915_WRITE(DSPARB, dsparb);
1252                 I915_WRITE(DSPARB2, dsparb2);
1253                 break;
1254         case PIPE_C:
1255                 dsparb3 = I915_READ(DSPARB3);
1256                 dsparb2 = I915_READ(DSPARB2);
1257
1258                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1259                              VLV_FIFO(SPRITEF, 0xff));
1260                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1261                             VLV_FIFO(SPRITEF, sprite1_start));
1262
1263                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1264                              VLV_FIFO(SPRITEF_HI, 0xff));
1265                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1266                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1267
1268                 I915_WRITE(DSPARB3, dsparb3);
1269                 I915_WRITE(DSPARB2, dsparb2);
1270                 break;
1271         default:
1272                 break;
1273         }
1274 }
1275
1276 #undef VLV_FIFO
1277
1278 static void vlv_merge_wm(struct drm_device *dev,
1279                          struct vlv_wm_values *wm)
1280 {
1281         struct intel_crtc *crtc;
1282         int num_active_crtcs = 0;
1283
1284         wm->level = to_i915(dev)->wm.max_level;
1285         wm->cxsr = true;
1286
1287         for_each_intel_crtc(dev, crtc) {
1288                 const struct vlv_wm_state *wm_state = &crtc->wm_state;
1289
1290                 if (!crtc->active)
1291                         continue;
1292
1293                 if (!wm_state->cxsr)
1294                         wm->cxsr = false;
1295
1296                 num_active_crtcs++;
1297                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1298         }
1299
1300         if (num_active_crtcs != 1)
1301                 wm->cxsr = false;
1302
1303         if (num_active_crtcs > 1)
1304                 wm->level = VLV_WM_LEVEL_PM2;
1305
1306         for_each_intel_crtc(dev, crtc) {
1307                 struct vlv_wm_state *wm_state = &crtc->wm_state;
1308                 enum pipe pipe = crtc->pipe;
1309
1310                 if (!crtc->active)
1311                         continue;
1312
1313                 wm->pipe[pipe] = wm_state->wm[wm->level];
1314                 if (wm->cxsr)
1315                         wm->sr = wm_state->sr[wm->level];
1316
1317                 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1318                 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1319                 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1320                 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1321         }
1322 }
1323
1324 static void vlv_update_wm(struct drm_crtc *crtc)
1325 {
1326         struct drm_device *dev = crtc->dev;
1327         struct drm_i915_private *dev_priv = dev->dev_private;
1328         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1329         enum pipe pipe = intel_crtc->pipe;
1330         struct vlv_wm_values wm = {};
1331
1332         vlv_compute_wm(intel_crtc);
1333         vlv_merge_wm(dev, &wm);
1334
1335         if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1336                 /* FIXME should be part of crtc atomic commit */
1337                 vlv_pipe_set_fifo_size(intel_crtc);
1338                 return;
1339         }
1340
1341         if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1342             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1343                 chv_set_memory_dvfs(dev_priv, false);
1344
1345         if (wm.level < VLV_WM_LEVEL_PM5 &&
1346             dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1347                 chv_set_memory_pm5(dev_priv, false);
1348
1349         if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1350                 intel_set_memory_cxsr(dev_priv, false);
1351
1352         /* FIXME should be part of crtc atomic commit */
1353         vlv_pipe_set_fifo_size(intel_crtc);
1354
1355         vlv_write_wm_values(intel_crtc, &wm);
1356
1357         DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1358                       "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1359                       pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1360                       wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1361                       wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1362
1363         if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1364                 intel_set_memory_cxsr(dev_priv, true);
1365
1366         if (wm.level >= VLV_WM_LEVEL_PM5 &&
1367             dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1368                 chv_set_memory_pm5(dev_priv, true);
1369
1370         if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1371             dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1372                 chv_set_memory_dvfs(dev_priv, true);
1373
1374         dev_priv->wm.vlv = wm;
1375 }
1376
1377 #define single_plane_enabled(mask) is_power_of_2(mask)
1378
1379 static void g4x_update_wm(struct drm_crtc *crtc)
1380 {
1381         struct drm_device *dev = crtc->dev;
1382         static const int sr_latency_ns = 12000;
1383         struct drm_i915_private *dev_priv = dev->dev_private;
1384         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1385         int plane_sr, cursor_sr;
1386         unsigned int enabled = 0;
1387         bool cxsr_enabled;
1388
1389         if (g4x_compute_wm0(dev, PIPE_A,
1390                             &g4x_wm_info, pessimal_latency_ns,
1391                             &g4x_cursor_wm_info, pessimal_latency_ns,
1392                             &planea_wm, &cursora_wm))
1393                 enabled |= 1 << PIPE_A;
1394
1395         if (g4x_compute_wm0(dev, PIPE_B,
1396                             &g4x_wm_info, pessimal_latency_ns,
1397                             &g4x_cursor_wm_info, pessimal_latency_ns,
1398                             &planeb_wm, &cursorb_wm))
1399                 enabled |= 1 << PIPE_B;
1400
1401         if (single_plane_enabled(enabled) &&
1402             g4x_compute_srwm(dev, ffs(enabled) - 1,
1403                              sr_latency_ns,
1404                              &g4x_wm_info,
1405                              &g4x_cursor_wm_info,
1406                              &plane_sr, &cursor_sr)) {
1407                 cxsr_enabled = true;
1408         } else {
1409                 cxsr_enabled = false;
1410                 intel_set_memory_cxsr(dev_priv, false);
1411                 plane_sr = cursor_sr = 0;
1412         }
1413
1414         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1415                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1416                       planea_wm, cursora_wm,
1417                       planeb_wm, cursorb_wm,
1418                       plane_sr, cursor_sr);
1419
1420         I915_WRITE(DSPFW1,
1421                    FW_WM(plane_sr, SR) |
1422                    FW_WM(cursorb_wm, CURSORB) |
1423                    FW_WM(planeb_wm, PLANEB) |
1424                    FW_WM(planea_wm, PLANEA));
1425         I915_WRITE(DSPFW2,
1426                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1427                    FW_WM(cursora_wm, CURSORA));
1428         /* HPLL off in SR has some issues on G4x... disable it */
1429         I915_WRITE(DSPFW3,
1430                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1431                    FW_WM(cursor_sr, CURSOR_SR));
1432
1433         if (cxsr_enabled)
1434                 intel_set_memory_cxsr(dev_priv, true);
1435 }
1436
1437 static void i965_update_wm(struct drm_crtc *unused_crtc)
1438 {
1439         struct drm_device *dev = unused_crtc->dev;
1440         struct drm_i915_private *dev_priv = dev->dev_private;
1441         struct drm_crtc *crtc;
1442         int srwm = 1;
1443         int cursor_sr = 16;
1444         bool cxsr_enabled;
1445
1446         /* Calc sr entries for one plane configs */
1447         crtc = single_enabled_crtc(dev);
1448         if (crtc) {
1449                 /* self-refresh has much higher latency */
1450                 static const int sr_latency_ns = 12000;
1451                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1452                 int clock = adjusted_mode->crtc_clock;
1453                 int htotal = adjusted_mode->crtc_htotal;
1454                 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1455                 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
1456                 unsigned long line_time_us;
1457                 int entries;
1458
1459                 line_time_us = max(htotal * 1000 / clock, 1);
1460
1461                 /* Use ns/us then divide to preserve precision */
1462                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1463                         cpp * hdisplay;
1464                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1465                 srwm = I965_FIFO_SIZE - entries;
1466                 if (srwm < 0)
1467                         srwm = 1;
1468                 srwm &= 0x1ff;
1469                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1470                               entries, srwm);
1471
1472                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1473                         cpp * crtc->cursor->state->crtc_w;
1474                 entries = DIV_ROUND_UP(entries,
1475                                           i965_cursor_wm_info.cacheline_size);
1476                 cursor_sr = i965_cursor_wm_info.fifo_size -
1477                         (entries + i965_cursor_wm_info.guard_size);
1478
1479                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1480                         cursor_sr = i965_cursor_wm_info.max_wm;
1481
1482                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1483                               "cursor %d\n", srwm, cursor_sr);
1484
1485                 cxsr_enabled = true;
1486         } else {
1487                 cxsr_enabled = false;
1488                 /* Turn off self refresh if both pipes are enabled */
1489                 intel_set_memory_cxsr(dev_priv, false);
1490         }
1491
1492         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1493                       srwm);
1494
1495         /* 965 has limitations... */
1496         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1497                    FW_WM(8, CURSORB) |
1498                    FW_WM(8, PLANEB) |
1499                    FW_WM(8, PLANEA));
1500         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1501                    FW_WM(8, PLANEC_OLD));
1502         /* update cursor SR watermark */
1503         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1504
1505         if (cxsr_enabled)
1506                 intel_set_memory_cxsr(dev_priv, true);
1507 }
1508
1509 #undef FW_WM
1510
1511 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1512 {
1513         struct drm_device *dev = unused_crtc->dev;
1514         struct drm_i915_private *dev_priv = dev->dev_private;
1515         const struct intel_watermark_params *wm_info;
1516         uint32_t fwater_lo;
1517         uint32_t fwater_hi;
1518         int cwm, srwm = 1;
1519         int fifo_size;
1520         int planea_wm, planeb_wm;
1521         struct drm_crtc *crtc, *enabled = NULL;
1522
1523         if (IS_I945GM(dev))
1524                 wm_info = &i945_wm_info;
1525         else if (!IS_GEN2(dev))
1526                 wm_info = &i915_wm_info;
1527         else
1528                 wm_info = &i830_a_wm_info;
1529
1530         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1531         crtc = intel_get_crtc_for_plane(dev, 0);
1532         if (intel_crtc_active(crtc)) {
1533                 const struct drm_display_mode *adjusted_mode;
1534                 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
1535                 if (IS_GEN2(dev))
1536                         cpp = 4;
1537
1538                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1539                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1540                                                wm_info, fifo_size, cpp,
1541                                                pessimal_latency_ns);
1542                 enabled = crtc;
1543         } else {
1544                 planea_wm = fifo_size - wm_info->guard_size;
1545                 if (planea_wm > (long)wm_info->max_wm)
1546                         planea_wm = wm_info->max_wm;
1547         }
1548
1549         if (IS_GEN2(dev))
1550                 wm_info = &i830_bc_wm_info;
1551
1552         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1553         crtc = intel_get_crtc_for_plane(dev, 1);
1554         if (intel_crtc_active(crtc)) {
1555                 const struct drm_display_mode *adjusted_mode;
1556                 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
1557                 if (IS_GEN2(dev))
1558                         cpp = 4;
1559
1560                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1561                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1562                                                wm_info, fifo_size, cpp,
1563                                                pessimal_latency_ns);
1564                 if (enabled == NULL)
1565                         enabled = crtc;
1566                 else
1567                         enabled = NULL;
1568         } else {
1569                 planeb_wm = fifo_size - wm_info->guard_size;
1570                 if (planeb_wm > (long)wm_info->max_wm)
1571                         planeb_wm = wm_info->max_wm;
1572         }
1573
1574         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1575
1576         if (IS_I915GM(dev) && enabled) {
1577                 struct drm_i915_gem_object *obj;
1578
1579                 obj = intel_fb_obj(enabled->primary->state->fb);
1580
1581                 /* self-refresh seems busted with untiled */
1582                 if (obj->tiling_mode == I915_TILING_NONE)
1583                         enabled = NULL;
1584         }
1585
1586         /*
1587          * Overlay gets an aggressive default since video jitter is bad.
1588          */
1589         cwm = 2;
1590
1591         /* Play safe and disable self-refresh before adjusting watermarks. */
1592         intel_set_memory_cxsr(dev_priv, false);
1593
1594         /* Calc sr entries for one plane configs */
1595         if (HAS_FW_BLC(dev) && enabled) {
1596                 /* self-refresh has much higher latency */
1597                 static const int sr_latency_ns = 6000;
1598                 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
1599                 int clock = adjusted_mode->crtc_clock;
1600                 int htotal = adjusted_mode->crtc_htotal;
1601                 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1602                 int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0);
1603                 unsigned long line_time_us;
1604                 int entries;
1605
1606                 line_time_us = max(htotal * 1000 / clock, 1);
1607
1608                 /* Use ns/us then divide to preserve precision */
1609                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1610                         cpp * hdisplay;
1611                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1612                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1613                 srwm = wm_info->fifo_size - entries;
1614                 if (srwm < 0)
1615                         srwm = 1;
1616
1617                 if (IS_I945G(dev) || IS_I945GM(dev))
1618                         I915_WRITE(FW_BLC_SELF,
1619                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1620                 else if (IS_I915GM(dev))
1621                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1622         }
1623
1624         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1625                       planea_wm, planeb_wm, cwm, srwm);
1626
1627         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1628         fwater_hi = (cwm & 0x1f);
1629
1630         /* Set request length to 8 cachelines per fetch */
1631         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1632         fwater_hi = fwater_hi | (1 << 8);
1633
1634         I915_WRITE(FW_BLC, fwater_lo);
1635         I915_WRITE(FW_BLC2, fwater_hi);
1636
1637         if (enabled)
1638                 intel_set_memory_cxsr(dev_priv, true);
1639 }
1640
1641 static void i845_update_wm(struct drm_crtc *unused_crtc)
1642 {
1643         struct drm_device *dev = unused_crtc->dev;
1644         struct drm_i915_private *dev_priv = dev->dev_private;
1645         struct drm_crtc *crtc;
1646         const struct drm_display_mode *adjusted_mode;
1647         uint32_t fwater_lo;
1648         int planea_wm;
1649
1650         crtc = single_enabled_crtc(dev);
1651         if (crtc == NULL)
1652                 return;
1653
1654         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1655         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1656                                        &i845_wm_info,
1657                                        dev_priv->display.get_fifo_size(dev, 0),
1658                                        4, pessimal_latency_ns);
1659         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1660         fwater_lo |= (3<<8) | planea_wm;
1661
1662         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1663
1664         I915_WRITE(FW_BLC, fwater_lo);
1665 }
1666
1667 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1668 {
1669         uint32_t pixel_rate;
1670
1671         pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1672
1673         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1674          * adjust the pixel_rate here. */
1675
1676         if (pipe_config->pch_pfit.enabled) {
1677                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1678                 uint32_t pfit_size = pipe_config->pch_pfit.size;
1679
1680                 pipe_w = pipe_config->pipe_src_w;
1681                 pipe_h = pipe_config->pipe_src_h;
1682
1683                 pfit_w = (pfit_size >> 16) & 0xFFFF;
1684                 pfit_h = pfit_size & 0xFFFF;
1685                 if (pipe_w < pfit_w)
1686                         pipe_w = pfit_w;
1687                 if (pipe_h < pfit_h)
1688                         pipe_h = pfit_h;
1689
1690                 if (WARN_ON(!pfit_w || !pfit_h))
1691                         return pixel_rate;
1692
1693                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1694                                      pfit_w * pfit_h);
1695         }
1696
1697         return pixel_rate;
1698 }
1699
1700 /* latency must be in 0.1us units. */
1701 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
1702 {
1703         uint64_t ret;
1704
1705         if (WARN(latency == 0, "Latency value missing\n"))
1706                 return UINT_MAX;
1707
1708         ret = (uint64_t) pixel_rate * cpp * latency;
1709         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1710
1711         return ret;
1712 }
1713
1714 /* latency must be in 0.1us units. */
1715 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1716                                uint32_t horiz_pixels, uint8_t cpp,
1717                                uint32_t latency)
1718 {
1719         uint32_t ret;
1720
1721         if (WARN(latency == 0, "Latency value missing\n"))
1722                 return UINT_MAX;
1723         if (WARN_ON(!pipe_htotal))
1724                 return UINT_MAX;
1725
1726         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1727         ret = (ret + 1) * horiz_pixels * cpp;
1728         ret = DIV_ROUND_UP(ret, 64) + 2;
1729         return ret;
1730 }
1731
1732 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1733                            uint8_t cpp)
1734 {
1735         /*
1736          * Neither of these should be possible since this function shouldn't be
1737          * called if the CRTC is off or the plane is invisible.  But let's be
1738          * extra paranoid to avoid a potential divide-by-zero if we screw up
1739          * elsewhere in the driver.
1740          */
1741         if (WARN_ON(!cpp))
1742                 return 0;
1743         if (WARN_ON(!horiz_pixels))
1744                 return 0;
1745
1746         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
1747 }
1748
1749 struct ilk_wm_maximums {
1750         uint16_t pri;
1751         uint16_t spr;
1752         uint16_t cur;
1753         uint16_t fbc;
1754 };
1755
1756 /*
1757  * For both WM_PIPE and WM_LP.
1758  * mem_value must be in 0.1us units.
1759  */
1760 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
1761                                    const struct intel_plane_state *pstate,
1762                                    uint32_t mem_value,
1763                                    bool is_lp)
1764 {
1765         int cpp = pstate->base.fb ?
1766                 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
1767         uint32_t method1, method2;
1768
1769         if (!cstate->base.active || !pstate->visible)
1770                 return 0;
1771
1772         method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value);
1773
1774         if (!is_lp)
1775                 return method1;
1776
1777         method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1778                                  cstate->base.adjusted_mode.crtc_htotal,
1779                                  drm_rect_width(&pstate->dst),
1780                                  cpp, mem_value);
1781
1782         return min(method1, method2);
1783 }
1784
1785 /*
1786  * For both WM_PIPE and WM_LP.
1787  * mem_value must be in 0.1us units.
1788  */
1789 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
1790                                    const struct intel_plane_state *pstate,
1791                                    uint32_t mem_value)
1792 {
1793         int cpp = pstate->base.fb ?
1794                 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
1795         uint32_t method1, method2;
1796
1797         if (!cstate->base.active || !pstate->visible)
1798                 return 0;
1799
1800         method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value);
1801         method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1802                                  cstate->base.adjusted_mode.crtc_htotal,
1803                                  drm_rect_width(&pstate->dst),
1804                                  cpp, mem_value);
1805         return min(method1, method2);
1806 }
1807
1808 /*
1809  * For both WM_PIPE and WM_LP.
1810  * mem_value must be in 0.1us units.
1811  */
1812 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
1813                                    const struct intel_plane_state *pstate,
1814                                    uint32_t mem_value)
1815 {
1816         /*
1817          * We treat the cursor plane as always-on for the purposes of watermark
1818          * calculation.  Until we have two-stage watermark programming merged,
1819          * this is necessary to avoid flickering.
1820          */
1821         int cpp = 4;
1822         int width = pstate->visible ? pstate->base.crtc_w : 64;
1823
1824         if (!cstate->base.active)
1825                 return 0;
1826
1827         return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1828                               cstate->base.adjusted_mode.crtc_htotal,
1829                               width, cpp, mem_value);
1830 }
1831
1832 /* Only for WM_LP. */
1833 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1834                                    const struct intel_plane_state *pstate,
1835                                    uint32_t pri_val)
1836 {
1837         int cpp = pstate->base.fb ?
1838                 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
1839
1840         if (!cstate->base.active || !pstate->visible)
1841                 return 0;
1842
1843         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp);
1844 }
1845
1846 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1847 {
1848         if (INTEL_INFO(dev)->gen >= 8)
1849                 return 3072;
1850         else if (INTEL_INFO(dev)->gen >= 7)
1851                 return 768;
1852         else
1853                 return 512;
1854 }
1855
1856 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1857                                          int level, bool is_sprite)
1858 {
1859         if (INTEL_INFO(dev)->gen >= 8)
1860                 /* BDW primary/sprite plane watermarks */
1861                 return level == 0 ? 255 : 2047;
1862         else if (INTEL_INFO(dev)->gen >= 7)
1863                 /* IVB/HSW primary/sprite plane watermarks */
1864                 return level == 0 ? 127 : 1023;
1865         else if (!is_sprite)
1866                 /* ILK/SNB primary plane watermarks */
1867                 return level == 0 ? 127 : 511;
1868         else
1869                 /* ILK/SNB sprite plane watermarks */
1870                 return level == 0 ? 63 : 255;
1871 }
1872
1873 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1874                                           int level)
1875 {
1876         if (INTEL_INFO(dev)->gen >= 7)
1877                 return level == 0 ? 63 : 255;
1878         else
1879                 return level == 0 ? 31 : 63;
1880 }
1881
1882 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1883 {
1884         if (INTEL_INFO(dev)->gen >= 8)
1885                 return 31;
1886         else
1887                 return 15;
1888 }
1889
1890 /* Calculate the maximum primary/sprite plane watermark */
1891 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1892                                      int level,
1893                                      const struct intel_wm_config *config,
1894                                      enum intel_ddb_partitioning ddb_partitioning,
1895                                      bool is_sprite)
1896 {
1897         unsigned int fifo_size = ilk_display_fifo_size(dev);
1898
1899         /* if sprites aren't enabled, sprites get nothing */
1900         if (is_sprite && !config->sprites_enabled)
1901                 return 0;
1902
1903         /* HSW allows LP1+ watermarks even with multiple pipes */
1904         if (level == 0 || config->num_pipes_active > 1) {
1905                 fifo_size /= INTEL_INFO(dev)->num_pipes;
1906
1907                 /*
1908                  * For some reason the non self refresh
1909                  * FIFO size is only half of the self
1910                  * refresh FIFO size on ILK/SNB.
1911                  */
1912                 if (INTEL_INFO(dev)->gen <= 6)
1913                         fifo_size /= 2;
1914         }
1915
1916         if (config->sprites_enabled) {
1917                 /* level 0 is always calculated with 1:1 split */
1918                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1919                         if (is_sprite)
1920                                 fifo_size *= 5;
1921                         fifo_size /= 6;
1922                 } else {
1923                         fifo_size /= 2;
1924                 }
1925         }
1926
1927         /* clamp to max that the registers can hold */
1928         return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1929 }
1930
1931 /* Calculate the maximum cursor plane watermark */
1932 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1933                                       int level,
1934                                       const struct intel_wm_config *config)
1935 {
1936         /* HSW LP1+ watermarks w/ multiple pipes */
1937         if (level > 0 && config->num_pipes_active > 1)
1938                 return 64;
1939
1940         /* otherwise just report max that registers can hold */
1941         return ilk_cursor_wm_reg_max(dev, level);
1942 }
1943
1944 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1945                                     int level,
1946                                     const struct intel_wm_config *config,
1947                                     enum intel_ddb_partitioning ddb_partitioning,
1948                                     struct ilk_wm_maximums *max)
1949 {
1950         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1951         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1952         max->cur = ilk_cursor_wm_max(dev, level, config);
1953         max->fbc = ilk_fbc_wm_reg_max(dev);
1954 }
1955
1956 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1957                                         int level,
1958                                         struct ilk_wm_maximums *max)
1959 {
1960         max->pri = ilk_plane_wm_reg_max(dev, level, false);
1961         max->spr = ilk_plane_wm_reg_max(dev, level, true);
1962         max->cur = ilk_cursor_wm_reg_max(dev, level);
1963         max->fbc = ilk_fbc_wm_reg_max(dev);
1964 }
1965
1966 static bool ilk_validate_wm_level(int level,
1967                                   const struct ilk_wm_maximums *max,
1968                                   struct intel_wm_level *result)
1969 {
1970         bool ret;
1971
1972         /* already determined to be invalid? */
1973         if (!result->enable)
1974                 return false;
1975
1976         result->enable = result->pri_val <= max->pri &&
1977                          result->spr_val <= max->spr &&
1978                          result->cur_val <= max->cur;
1979
1980         ret = result->enable;
1981
1982         /*
1983          * HACK until we can pre-compute everything,
1984          * and thus fail gracefully if LP0 watermarks
1985          * are exceeded...
1986          */
1987         if (level == 0 && !result->enable) {
1988                 if (result->pri_val > max->pri)
1989                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1990                                       level, result->pri_val, max->pri);
1991                 if (result->spr_val > max->spr)
1992                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1993                                       level, result->spr_val, max->spr);
1994                 if (result->cur_val > max->cur)
1995                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1996                                       level, result->cur_val, max->cur);
1997
1998                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1999                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2000                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2001                 result->enable = true;
2002         }
2003
2004         return ret;
2005 }
2006
2007 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2008                                  const struct intel_crtc *intel_crtc,
2009                                  int level,
2010                                  struct intel_crtc_state *cstate,
2011                                  struct intel_plane_state *pristate,
2012                                  struct intel_plane_state *sprstate,
2013                                  struct intel_plane_state *curstate,
2014                                  struct intel_wm_level *result)
2015 {
2016         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2017         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2018         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2019
2020         /* WM1+ latency values stored in 0.5us units */
2021         if (level > 0) {
2022                 pri_latency *= 5;
2023                 spr_latency *= 5;
2024                 cur_latency *= 5;
2025         }
2026
2027         if (pristate) {
2028                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2029                                                      pri_latency, level);
2030                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2031         }
2032
2033         if (sprstate)
2034                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2035
2036         if (curstate)
2037                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2038
2039         result->enable = true;
2040 }
2041
2042 static uint32_t
2043 hsw_compute_linetime_wm(struct drm_device *dev,
2044                         struct intel_crtc_state *cstate)
2045 {
2046         struct drm_i915_private *dev_priv = dev->dev_private;
2047         const struct drm_display_mode *adjusted_mode =
2048                 &cstate->base.adjusted_mode;
2049         u32 linetime, ips_linetime;
2050
2051         if (!cstate->base.active)
2052                 return 0;
2053         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2054                 return 0;
2055         if (WARN_ON(dev_priv->cdclk_freq == 0))
2056                 return 0;
2057
2058         /* The WM are computed with base on how long it takes to fill a single
2059          * row at the given clock rate, multiplied by 8.
2060          * */
2061         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2062                                      adjusted_mode->crtc_clock);
2063         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2064                                          dev_priv->cdclk_freq);
2065
2066         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2067                PIPE_WM_LINETIME_TIME(linetime);
2068 }
2069
2070 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2071 {
2072         struct drm_i915_private *dev_priv = dev->dev_private;
2073
2074         if (IS_GEN9(dev)) {
2075                 uint32_t val;
2076                 int ret, i;
2077                 int level, max_level = ilk_wm_max_level(dev);
2078
2079                 /* read the first set of memory latencies[0:3] */
2080                 val = 0; /* data0 to be programmed to 0 for first set */
2081                 mutex_lock(&dev_priv->rps.hw_lock);
2082                 ret = sandybridge_pcode_read(dev_priv,
2083                                              GEN9_PCODE_READ_MEM_LATENCY,
2084                                              &val);
2085                 mutex_unlock(&dev_priv->rps.hw_lock);
2086
2087                 if (ret) {
2088                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2089                         return;
2090                 }
2091
2092                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2093                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2094                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2095                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2096                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2097                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2098                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2099
2100                 /* read the second set of memory latencies[4:7] */
2101                 val = 1; /* data0 to be programmed to 1 for second set */
2102                 mutex_lock(&dev_priv->rps.hw_lock);
2103                 ret = sandybridge_pcode_read(dev_priv,
2104                                              GEN9_PCODE_READ_MEM_LATENCY,
2105                                              &val);
2106                 mutex_unlock(&dev_priv->rps.hw_lock);
2107                 if (ret) {
2108                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2109                         return;
2110                 }
2111
2112                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2113                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2114                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2115                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2116                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2117                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2118                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2119
2120                 /*
2121                  * WaWmMemoryReadLatency:skl
2122                  *
2123                  * punit doesn't take into account the read latency so we need
2124                  * to add 2us to the various latency levels we retrieve from
2125                  * the punit.
2126                  *   - W0 is a bit special in that it's the only level that
2127                  *   can't be disabled if we want to have display working, so
2128                  *   we always add 2us there.
2129                  *   - For levels >=1, punit returns 0us latency when they are
2130                  *   disabled, so we respect that and don't add 2us then
2131                  *
2132                  * Additionally, if a level n (n > 1) has a 0us latency, all
2133                  * levels m (m >= n) need to be disabled. We make sure to
2134                  * sanitize the values out of the punit to satisfy this
2135                  * requirement.
2136                  */
2137                 wm[0] += 2;
2138                 for (level = 1; level <= max_level; level++)
2139                         if (wm[level] != 0)
2140                                 wm[level] += 2;
2141                         else {
2142                                 for (i = level + 1; i <= max_level; i++)
2143                                         wm[i] = 0;
2144
2145                                 break;
2146                         }
2147         } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2148                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2149
2150                 wm[0] = (sskpd >> 56) & 0xFF;
2151                 if (wm[0] == 0)
2152                         wm[0] = sskpd & 0xF;
2153                 wm[1] = (sskpd >> 4) & 0xFF;
2154                 wm[2] = (sskpd >> 12) & 0xFF;
2155                 wm[3] = (sskpd >> 20) & 0x1FF;
2156                 wm[4] = (sskpd >> 32) & 0x1FF;
2157         } else if (INTEL_INFO(dev)->gen >= 6) {
2158                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2159
2160                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2161                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2162                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2163                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2164         } else if (INTEL_INFO(dev)->gen >= 5) {
2165                 uint32_t mltr = I915_READ(MLTR_ILK);
2166
2167                 /* ILK primary LP0 latency is 700 ns */
2168                 wm[0] = 7;
2169                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2170                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2171         }
2172 }
2173
2174 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2175 {
2176         /* ILK sprite LP0 latency is 1300 ns */
2177         if (INTEL_INFO(dev)->gen == 5)
2178                 wm[0] = 13;
2179 }
2180
2181 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2182 {
2183         /* ILK cursor LP0 latency is 1300 ns */
2184         if (INTEL_INFO(dev)->gen == 5)
2185                 wm[0] = 13;
2186
2187         /* WaDoubleCursorLP3Latency:ivb */
2188         if (IS_IVYBRIDGE(dev))
2189                 wm[3] *= 2;
2190 }
2191
2192 int ilk_wm_max_level(const struct drm_device *dev)
2193 {
2194         /* how many WM levels are we expecting */
2195         if (INTEL_INFO(dev)->gen >= 9)
2196                 return 7;
2197         else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2198                 return 4;
2199         else if (INTEL_INFO(dev)->gen >= 6)
2200                 return 3;
2201         else
2202                 return 2;
2203 }
2204
2205 static void intel_print_wm_latency(struct drm_device *dev,
2206                                    const char *name,
2207                                    const uint16_t wm[8])
2208 {
2209         int level, max_level = ilk_wm_max_level(dev);
2210
2211         for (level = 0; level <= max_level; level++) {
2212                 unsigned int latency = wm[level];
2213
2214                 if (latency == 0) {
2215                         DRM_ERROR("%s WM%d latency not provided\n",
2216                                   name, level);
2217                         continue;
2218                 }
2219
2220                 /*
2221                  * - latencies are in us on gen9.
2222                  * - before then, WM1+ latency values are in 0.5us units
2223                  */
2224                 if (IS_GEN9(dev))
2225                         latency *= 10;
2226                 else if (level > 0)
2227                         latency *= 5;
2228
2229                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2230                               name, level, wm[level],
2231                               latency / 10, latency % 10);
2232         }
2233 }
2234
2235 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2236                                     uint16_t wm[5], uint16_t min)
2237 {
2238         int level, max_level = ilk_wm_max_level(dev_priv->dev);
2239
2240         if (wm[0] >= min)
2241                 return false;
2242
2243         wm[0] = max(wm[0], min);
2244         for (level = 1; level <= max_level; level++)
2245                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2246
2247         return true;
2248 }
2249
2250 static void snb_wm_latency_quirk(struct drm_device *dev)
2251 {
2252         struct drm_i915_private *dev_priv = dev->dev_private;
2253         bool changed;
2254
2255         /*
2256          * The BIOS provided WM memory latency values are often
2257          * inadequate for high resolution displays. Adjust them.
2258          */
2259         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2260                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2261                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2262
2263         if (!changed)
2264                 return;
2265
2266         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2267         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2268         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2269         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2270 }
2271
2272 static void ilk_setup_wm_latency(struct drm_device *dev)
2273 {
2274         struct drm_i915_private *dev_priv = dev->dev_private;
2275
2276         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2277
2278         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2279                sizeof(dev_priv->wm.pri_latency));
2280         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2281                sizeof(dev_priv->wm.pri_latency));
2282
2283         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2284         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2285
2286         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2287         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2288         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2289
2290         if (IS_GEN6(dev))
2291                 snb_wm_latency_quirk(dev);
2292 }
2293
2294 static void skl_setup_wm_latency(struct drm_device *dev)
2295 {
2296         struct drm_i915_private *dev_priv = dev->dev_private;
2297
2298         intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2299         intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2300 }
2301
2302 static bool ilk_validate_pipe_wm(struct drm_device *dev,
2303                                  struct intel_pipe_wm *pipe_wm)
2304 {
2305         /* LP0 watermark maximums depend on this pipe alone */
2306         const struct intel_wm_config config = {
2307                 .num_pipes_active = 1,
2308                 .sprites_enabled = pipe_wm->sprites_enabled,
2309                 .sprites_scaled = pipe_wm->sprites_scaled,
2310         };
2311         struct ilk_wm_maximums max;
2312
2313         /* LP0 watermarks always use 1/2 DDB partitioning */
2314         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2315
2316         /* At least LP0 must be valid */
2317         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
2318                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
2319                 return false;
2320         }
2321
2322         return true;
2323 }
2324
2325 /* Compute new watermarks for the pipe */
2326 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
2327 {
2328         struct drm_atomic_state *state = cstate->base.state;
2329         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
2330         struct intel_pipe_wm *pipe_wm;
2331         struct drm_device *dev = state->dev;
2332         const struct drm_i915_private *dev_priv = dev->dev_private;
2333         struct intel_plane *intel_plane;
2334         struct intel_plane_state *pristate = NULL;
2335         struct intel_plane_state *sprstate = NULL;
2336         struct intel_plane_state *curstate = NULL;
2337         int level, max_level = ilk_wm_max_level(dev), usable_level;
2338         struct ilk_wm_maximums max;
2339
2340         pipe_wm = &cstate->wm.optimal.ilk;
2341
2342         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2343                 struct intel_plane_state *ps;
2344
2345                 ps = intel_atomic_get_existing_plane_state(state,
2346                                                            intel_plane);
2347                 if (!ps)
2348                         continue;
2349
2350                 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
2351                         pristate = ps;
2352                 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY)
2353                         sprstate = ps;
2354                 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
2355                         curstate = ps;
2356         }
2357
2358         pipe_wm->pipe_enabled = cstate->base.active;
2359         if (sprstate) {
2360                 pipe_wm->sprites_enabled = sprstate->visible;
2361                 pipe_wm->sprites_scaled = sprstate->visible &&
2362                         (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
2363                          drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
2364         }
2365
2366         usable_level = max_level;
2367
2368         /* ILK/SNB: LP2+ watermarks only w/o sprites */
2369         if (INTEL_INFO(dev)->gen <= 6 && pipe_wm->sprites_enabled)
2370                 usable_level = 1;
2371
2372         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2373         if (pipe_wm->sprites_scaled)
2374                 usable_level = 0;
2375
2376         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
2377                              pristate, sprstate, curstate, &pipe_wm->raw_wm[0]);
2378
2379         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
2380         pipe_wm->wm[0] = pipe_wm->raw_wm[0];
2381
2382         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2383                 pipe_wm->linetime = hsw_compute_linetime_wm(dev, cstate);
2384
2385         if (!ilk_validate_pipe_wm(dev, pipe_wm))
2386                 return -EINVAL;
2387
2388         ilk_compute_wm_reg_maximums(dev, 1, &max);
2389
2390         for (level = 1; level <= max_level; level++) {
2391                 struct intel_wm_level *wm = &pipe_wm->raw_wm[level];
2392
2393                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
2394                                      pristate, sprstate, curstate, wm);
2395
2396                 /*
2397                  * Disable any watermark level that exceeds the
2398                  * register maximums since such watermarks are
2399                  * always invalid.
2400                  */
2401                 if (level > usable_level)
2402                         continue;
2403
2404                 if (ilk_validate_wm_level(level, &max, wm))
2405                         pipe_wm->wm[level] = *wm;
2406                 else
2407                         usable_level = level;
2408         }
2409
2410         return 0;
2411 }
2412
2413 /*
2414  * Build a set of 'intermediate' watermark values that satisfy both the old
2415  * state and the new state.  These can be programmed to the hardware
2416  * immediately.
2417  */
2418 static int ilk_compute_intermediate_wm(struct drm_device *dev,
2419                                        struct intel_crtc *intel_crtc,
2420                                        struct intel_crtc_state *newstate)
2421 {
2422         struct intel_pipe_wm *a = &newstate->wm.intermediate;
2423         struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk;
2424         int level, max_level = ilk_wm_max_level(dev);
2425
2426         /*
2427          * Start with the final, target watermarks, then combine with the
2428          * currently active watermarks to get values that are safe both before
2429          * and after the vblank.
2430          */
2431         *a = newstate->wm.optimal.ilk;
2432         a->pipe_enabled |= b->pipe_enabled;
2433         a->sprites_enabled |= b->sprites_enabled;
2434         a->sprites_scaled |= b->sprites_scaled;
2435
2436         for (level = 0; level <= max_level; level++) {
2437                 struct intel_wm_level *a_wm = &a->wm[level];
2438                 const struct intel_wm_level *b_wm = &b->wm[level];
2439
2440                 a_wm->enable &= b_wm->enable;
2441                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
2442                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
2443                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
2444                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
2445         }
2446
2447         /*
2448          * We need to make sure that these merged watermark values are
2449          * actually a valid configuration themselves.  If they're not,
2450          * there's no safe way to transition from the old state to
2451          * the new state, so we need to fail the atomic transaction.
2452          */
2453         if (!ilk_validate_pipe_wm(dev, a))
2454                 return -EINVAL;
2455
2456         /*
2457          * If our intermediate WM are identical to the final WM, then we can
2458          * omit the post-vblank programming; only update if it's different.
2459          */
2460         if (memcmp(a, &newstate->wm.optimal.ilk, sizeof(*a)) == 0)
2461                 newstate->wm.need_postvbl_update = false;
2462
2463         return 0;
2464 }
2465
2466 /*
2467  * Merge the watermarks from all active pipes for a specific level.
2468  */
2469 static void ilk_merge_wm_level(struct drm_device *dev,
2470                                int level,
2471                                struct intel_wm_level *ret_wm)
2472 {
2473         const struct intel_crtc *intel_crtc;
2474
2475         ret_wm->enable = true;
2476
2477         for_each_intel_crtc(dev, intel_crtc) {
2478                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
2479                 const struct intel_wm_level *wm = &active->wm[level];
2480
2481                 if (!active->pipe_enabled)
2482                         continue;
2483
2484                 /*
2485                  * The watermark values may have been used in the past,
2486                  * so we must maintain them in the registers for some
2487                  * time even if the level is now disabled.
2488                  */
2489                 if (!wm->enable)
2490                         ret_wm->enable = false;
2491
2492                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2493                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2494                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2495                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2496         }
2497 }
2498
2499 /*
2500  * Merge all low power watermarks for all active pipes.
2501  */
2502 static void ilk_wm_merge(struct drm_device *dev,
2503                          const struct intel_wm_config *config,
2504                          const struct ilk_wm_maximums *max,
2505                          struct intel_pipe_wm *merged)
2506 {
2507         struct drm_i915_private *dev_priv = dev->dev_private;
2508         int level, max_level = ilk_wm_max_level(dev);
2509         int last_enabled_level = max_level;
2510
2511         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2512         if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2513             config->num_pipes_active > 1)
2514                 last_enabled_level = 0;
2515
2516         /* ILK: FBC WM must be disabled always */
2517         merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2518
2519         /* merge each WM1+ level */
2520         for (level = 1; level <= max_level; level++) {
2521                 struct intel_wm_level *wm = &merged->wm[level];
2522
2523                 ilk_merge_wm_level(dev, level, wm);
2524
2525                 if (level > last_enabled_level)
2526                         wm->enable = false;
2527                 else if (!ilk_validate_wm_level(level, max, wm))
2528                         /* make sure all following levels get disabled */
2529                         last_enabled_level = level - 1;
2530
2531                 /*
2532                  * The spec says it is preferred to disable
2533                  * FBC WMs instead of disabling a WM level.
2534                  */
2535                 if (wm->fbc_val > max->fbc) {
2536                         if (wm->enable)
2537                                 merged->fbc_wm_enabled = false;
2538                         wm->fbc_val = 0;
2539                 }
2540         }
2541
2542         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2543         /*
2544          * FIXME this is racy. FBC might get enabled later.
2545          * What we should check here is whether FBC can be
2546          * enabled sometime later.
2547          */
2548         if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2549             intel_fbc_is_active(dev_priv)) {
2550                 for (level = 2; level <= max_level; level++) {
2551                         struct intel_wm_level *wm = &merged->wm[level];
2552
2553                         wm->enable = false;
2554                 }
2555         }
2556 }
2557
2558 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2559 {
2560         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2561         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2562 }
2563
2564 /* The value we need to program into the WM_LPx latency field */
2565 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2566 {
2567         struct drm_i915_private *dev_priv = dev->dev_private;
2568
2569         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2570                 return 2 * level;
2571         else
2572                 return dev_priv->wm.pri_latency[level];
2573 }
2574
2575 static void ilk_compute_wm_results(struct drm_device *dev,
2576                                    const struct intel_pipe_wm *merged,
2577                                    enum intel_ddb_partitioning partitioning,
2578                                    struct ilk_wm_values *results)
2579 {
2580         struct intel_crtc *intel_crtc;
2581         int level, wm_lp;
2582
2583         results->enable_fbc_wm = merged->fbc_wm_enabled;
2584         results->partitioning = partitioning;
2585
2586         /* LP1+ register values */
2587         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2588                 const struct intel_wm_level *r;
2589
2590                 level = ilk_wm_lp_to_level(wm_lp, merged);
2591
2592                 r = &merged->wm[level];
2593
2594                 /*
2595                  * Maintain the watermark values even if the level is
2596                  * disabled. Doing otherwise could cause underruns.
2597                  */
2598                 results->wm_lp[wm_lp - 1] =
2599                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2600                         (r->pri_val << WM1_LP_SR_SHIFT) |
2601                         r->cur_val;
2602
2603                 if (r->enable)
2604                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2605
2606                 if (INTEL_INFO(dev)->gen >= 8)
2607                         results->wm_lp[wm_lp - 1] |=
2608                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2609                 else
2610                         results->wm_lp[wm_lp - 1] |=
2611                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2612
2613                 /*
2614                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2615                  * level is disabled. Doing otherwise could cause underruns.
2616                  */
2617                 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2618                         WARN_ON(wm_lp != 1);
2619                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2620                 } else
2621                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2622         }
2623
2624         /* LP0 register values */
2625         for_each_intel_crtc(dev, intel_crtc) {
2626                 enum pipe pipe = intel_crtc->pipe;
2627                 const struct intel_wm_level *r =
2628                         &intel_crtc->wm.active.ilk.wm[0];
2629
2630                 if (WARN_ON(!r->enable))
2631                         continue;
2632
2633                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
2634
2635                 results->wm_pipe[pipe] =
2636                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2637                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2638                         r->cur_val;
2639         }
2640 }
2641
2642 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2643  * case both are at the same level. Prefer r1 in case they're the same. */
2644 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2645                                                   struct intel_pipe_wm *r1,
2646                                                   struct intel_pipe_wm *r2)
2647 {
2648         int level, max_level = ilk_wm_max_level(dev);
2649         int level1 = 0, level2 = 0;
2650
2651         for (level = 1; level <= max_level; level++) {
2652                 if (r1->wm[level].enable)
2653                         level1 = level;
2654                 if (r2->wm[level].enable)
2655                         level2 = level;
2656         }
2657
2658         if (level1 == level2) {
2659                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2660                         return r2;
2661                 else
2662                         return r1;
2663         } else if (level1 > level2) {
2664                 return r1;
2665         } else {
2666                 return r2;
2667         }
2668 }
2669
2670 /* dirty bits used to track which watermarks need changes */
2671 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2672 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2673 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2674 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2675 #define WM_DIRTY_FBC (1 << 24)
2676 #define WM_DIRTY_DDB (1 << 25)
2677
2678 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2679                                          const struct ilk_wm_values *old,
2680                                          const struct ilk_wm_values *new)
2681 {
2682         unsigned int dirty = 0;
2683         enum pipe pipe;
2684         int wm_lp;
2685
2686         for_each_pipe(dev_priv, pipe) {
2687                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2688                         dirty |= WM_DIRTY_LINETIME(pipe);
2689                         /* Must disable LP1+ watermarks too */
2690                         dirty |= WM_DIRTY_LP_ALL;
2691                 }
2692
2693                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2694                         dirty |= WM_DIRTY_PIPE(pipe);
2695                         /* Must disable LP1+ watermarks too */
2696                         dirty |= WM_DIRTY_LP_ALL;
2697                 }
2698         }
2699
2700         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2701                 dirty |= WM_DIRTY_FBC;
2702                 /* Must disable LP1+ watermarks too */
2703                 dirty |= WM_DIRTY_LP_ALL;
2704         }
2705
2706         if (old->partitioning != new->partitioning) {
2707                 dirty |= WM_DIRTY_DDB;
2708                 /* Must disable LP1+ watermarks too */
2709                 dirty |= WM_DIRTY_LP_ALL;
2710         }
2711
2712         /* LP1+ watermarks already deemed dirty, no need to continue */
2713         if (dirty & WM_DIRTY_LP_ALL)
2714                 return dirty;
2715
2716         /* Find the lowest numbered LP1+ watermark in need of an update... */
2717         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2718                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2719                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2720                         break;
2721         }
2722
2723         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2724         for (; wm_lp <= 3; wm_lp++)
2725                 dirty |= WM_DIRTY_LP(wm_lp);
2726
2727         return dirty;
2728 }
2729
2730 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2731                                unsigned int dirty)
2732 {
2733         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2734         bool changed = false;
2735
2736         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2737                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2738                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2739                 changed = true;
2740         }
2741         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2742                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2743                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2744                 changed = true;
2745         }
2746         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2747                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2748                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2749                 changed = true;
2750         }
2751
2752         /*
2753          * Don't touch WM1S_LP_EN here.
2754          * Doing so could cause underruns.
2755          */
2756
2757         return changed;
2758 }
2759
2760 /*
2761  * The spec says we shouldn't write when we don't need, because every write
2762  * causes WMs to be re-evaluated, expending some power.
2763  */
2764 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2765                                 struct ilk_wm_values *results)
2766 {
2767         struct drm_device *dev = dev_priv->dev;
2768         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2769         unsigned int dirty;
2770         uint32_t val;
2771
2772         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2773         if (!dirty)
2774                 return;
2775
2776         _ilk_disable_lp_wm(dev_priv, dirty);
2777
2778         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2779                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2780         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2781                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2782         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2783                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2784
2785         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2786                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2787         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2788                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2789         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2790                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2791
2792         if (dirty & WM_DIRTY_DDB) {
2793                 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2794                         val = I915_READ(WM_MISC);
2795                         if (results->partitioning == INTEL_DDB_PART_1_2)
2796                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
2797                         else
2798                                 val |= WM_MISC_DATA_PARTITION_5_6;
2799                         I915_WRITE(WM_MISC, val);
2800                 } else {
2801                         val = I915_READ(DISP_ARB_CTL2);
2802                         if (results->partitioning == INTEL_DDB_PART_1_2)
2803                                 val &= ~DISP_DATA_PARTITION_5_6;
2804                         else
2805                                 val |= DISP_DATA_PARTITION_5_6;
2806                         I915_WRITE(DISP_ARB_CTL2, val);
2807                 }
2808         }
2809
2810         if (dirty & WM_DIRTY_FBC) {
2811                 val = I915_READ(DISP_ARB_CTL);
2812                 if (results->enable_fbc_wm)
2813                         val &= ~DISP_FBC_WM_DIS;
2814                 else
2815                         val |= DISP_FBC_WM_DIS;
2816                 I915_WRITE(DISP_ARB_CTL, val);
2817         }
2818
2819         if (dirty & WM_DIRTY_LP(1) &&
2820             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2821                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2822
2823         if (INTEL_INFO(dev)->gen >= 7) {
2824                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2825                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2826                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2827                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2828         }
2829
2830         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2831                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2832         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2833                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2834         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2835                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2836
2837         dev_priv->wm.hw = *results;
2838 }
2839
2840 bool ilk_disable_lp_wm(struct drm_device *dev)
2841 {
2842         struct drm_i915_private *dev_priv = dev->dev_private;
2843
2844         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2845 }
2846
2847 /*
2848  * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2849  * different active planes.
2850  */
2851
2852 #define SKL_DDB_SIZE            896     /* in blocks */
2853 #define BXT_DDB_SIZE            512
2854
2855 /*
2856  * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
2857  * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
2858  * other universal planes are in indices 1..n.  Note that this may leave unused
2859  * indices between the top "sprite" plane and the cursor.
2860  */
2861 static int
2862 skl_wm_plane_id(const struct intel_plane *plane)
2863 {
2864         switch (plane->base.type) {
2865         case DRM_PLANE_TYPE_PRIMARY:
2866                 return 0;
2867         case DRM_PLANE_TYPE_CURSOR:
2868                 return PLANE_CURSOR;
2869         case DRM_PLANE_TYPE_OVERLAY:
2870                 return plane->plane + 1;
2871         default:
2872                 MISSING_CASE(plane->base.type);
2873                 return plane->plane;
2874         }
2875 }
2876
2877 static void
2878 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2879                                    const struct intel_crtc_state *cstate,
2880                                    const struct intel_wm_config *config,
2881                                    struct skl_ddb_entry *alloc /* out */)
2882 {
2883         struct drm_crtc *for_crtc = cstate->base.crtc;
2884         struct drm_crtc *crtc;
2885         unsigned int pipe_size, ddb_size;
2886         int nth_active_pipe;
2887
2888         if (!cstate->base.active) {
2889                 alloc->start = 0;
2890                 alloc->end = 0;
2891                 return;
2892         }
2893
2894         if (IS_BROXTON(dev))
2895                 ddb_size = BXT_DDB_SIZE;
2896         else
2897                 ddb_size = SKL_DDB_SIZE;
2898
2899         ddb_size -= 4; /* 4 blocks for bypass path allocation */
2900
2901         nth_active_pipe = 0;
2902         for_each_crtc(dev, crtc) {
2903                 if (!to_intel_crtc(crtc)->active)
2904                         continue;
2905
2906                 if (crtc == for_crtc)
2907                         break;
2908
2909                 nth_active_pipe++;
2910         }
2911
2912         pipe_size = ddb_size / config->num_pipes_active;
2913         alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2914         alloc->end = alloc->start + pipe_size;
2915 }
2916
2917 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2918 {
2919         if (config->num_pipes_active == 1)
2920                 return 32;
2921
2922         return 8;
2923 }
2924
2925 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2926 {
2927         entry->start = reg & 0x3ff;
2928         entry->end = (reg >> 16) & 0x3ff;
2929         if (entry->end)
2930                 entry->end += 1;
2931 }
2932
2933 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2934                           struct skl_ddb_allocation *ddb /* out */)
2935 {
2936         enum pipe pipe;
2937         int plane;
2938         u32 val;
2939
2940         memset(ddb, 0, sizeof(*ddb));
2941
2942         for_each_pipe(dev_priv, pipe) {
2943                 enum intel_display_power_domain power_domain;
2944
2945                 power_domain = POWER_DOMAIN_PIPE(pipe);
2946                 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
2947                         continue;
2948
2949                 for_each_plane(dev_priv, pipe, plane) {
2950                         val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2951                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2952                                                    val);
2953                 }
2954
2955                 val = I915_READ(CUR_BUF_CFG(pipe));
2956                 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
2957                                            val);
2958
2959                 intel_display_power_put(dev_priv, power_domain);
2960         }
2961 }
2962
2963 static unsigned int
2964 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
2965                              const struct drm_plane_state *pstate,
2966                              int y)
2967 {
2968         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
2969         struct drm_framebuffer *fb = pstate->fb;
2970         uint32_t width = 0, height = 0;
2971
2972         width = drm_rect_width(&intel_pstate->src) >> 16;
2973         height = drm_rect_height(&intel_pstate->src) >> 16;
2974
2975         if (intel_rotation_90_or_270(pstate->rotation))
2976                 swap(width, height);
2977
2978         /* for planar format */
2979         if (fb->pixel_format == DRM_FORMAT_NV12) {
2980                 if (y)  /* y-plane data rate */
2981                         return width * height *
2982                                 drm_format_plane_cpp(fb->pixel_format, 0);
2983                 else    /* uv-plane data rate */
2984                         return (width / 2) * (height / 2) *
2985                                 drm_format_plane_cpp(fb->pixel_format, 1);
2986         }
2987
2988         /* for packed formats */
2989         return width * height * drm_format_plane_cpp(fb->pixel_format, 0);
2990 }
2991
2992 /*
2993  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2994  * a 8192x4096@32bpp framebuffer:
2995  *   3 * 4096 * 8192  * 4 < 2^32
2996  */
2997 static unsigned int
2998 skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate)
2999 {
3000         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3001         struct drm_device *dev = intel_crtc->base.dev;
3002         const struct intel_plane *intel_plane;
3003         unsigned int total_data_rate = 0;
3004
3005         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
3006                 const struct drm_plane_state *pstate = intel_plane->base.state;
3007
3008                 if (pstate->fb == NULL)
3009                         continue;
3010
3011                 if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
3012                         continue;
3013
3014                 /* packed/uv */
3015                 total_data_rate += skl_plane_relative_data_rate(cstate,
3016                                                                 pstate,
3017                                                                 0);
3018
3019                 if (pstate->fb->pixel_format == DRM_FORMAT_NV12)
3020                         /* y-plane */
3021                         total_data_rate += skl_plane_relative_data_rate(cstate,
3022                                                                         pstate,
3023                                                                         1);
3024         }
3025
3026         return total_data_rate;
3027 }
3028
3029 static void
3030 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
3031                       struct skl_ddb_allocation *ddb /* out */)
3032 {
3033         struct drm_crtc *crtc = cstate->base.crtc;
3034         struct drm_device *dev = crtc->dev;
3035         struct drm_i915_private *dev_priv = to_i915(dev);
3036         struct intel_wm_config *config = &dev_priv->wm.config;
3037         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3038         struct intel_plane *intel_plane;
3039         enum pipe pipe = intel_crtc->pipe;
3040         struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
3041         uint16_t alloc_size, start, cursor_blocks;
3042         uint16_t minimum[I915_MAX_PLANES];
3043         uint16_t y_minimum[I915_MAX_PLANES];
3044         unsigned int total_data_rate;
3045
3046         skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc);
3047         alloc_size = skl_ddb_entry_size(alloc);
3048         if (alloc_size == 0) {
3049                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
3050                 memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
3051                        sizeof(ddb->plane[pipe][PLANE_CURSOR]));
3052                 return;
3053         }
3054
3055         cursor_blocks = skl_cursor_allocation(config);
3056         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
3057         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
3058
3059         alloc_size -= cursor_blocks;
3060         alloc->end -= cursor_blocks;
3061
3062         /* 1. Allocate the mininum required blocks for each active plane */
3063         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
3064                 struct drm_plane *plane = &intel_plane->base;
3065                 struct drm_framebuffer *fb = plane->state->fb;
3066                 int id = skl_wm_plane_id(intel_plane);
3067
3068                 if (!to_intel_plane_state(plane->state)->visible)
3069                         continue;
3070
3071                 if (plane->type == DRM_PLANE_TYPE_CURSOR)
3072                         continue;
3073
3074                 minimum[id] = 8;
3075                 alloc_size -= minimum[id];
3076                 y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0;
3077                 alloc_size -= y_minimum[id];
3078         }
3079
3080         /*
3081          * 2. Distribute the remaining space in proportion to the amount of
3082          * data each plane needs to fetch from memory.
3083          *
3084          * FIXME: we may not allocate every single block here.
3085          */
3086         total_data_rate = skl_get_total_relative_data_rate(cstate);
3087
3088         start = alloc->start;
3089         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
3090                 struct drm_plane *plane = &intel_plane->base;
3091                 struct drm_plane_state *pstate = intel_plane->base.state;
3092                 unsigned int data_rate, y_data_rate;
3093                 uint16_t plane_blocks, y_plane_blocks = 0;
3094                 int id = skl_wm_plane_id(intel_plane);
3095
3096                 if (!to_intel_plane_state(pstate)->visible)
3097                         continue;
3098                 if (plane->type == DRM_PLANE_TYPE_CURSOR)
3099                         continue;
3100
3101                 data_rate = skl_plane_relative_data_rate(cstate, pstate, 0);
3102
3103                 /*
3104                  * allocation for (packed formats) or (uv-plane part of planar format):
3105                  * promote the expression to 64 bits to avoid overflowing, the
3106                  * result is < available as data_rate / total_data_rate < 1
3107                  */
3108                 plane_blocks = minimum[id];
3109                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
3110                                         total_data_rate);
3111
3112                 ddb->plane[pipe][id].start = start;
3113                 ddb->plane[pipe][id].end = start + plane_blocks;
3114
3115                 start += plane_blocks;
3116
3117                 /*
3118                  * allocation for y_plane part of planar format:
3119                  */
3120                 if (pstate->fb->pixel_format == DRM_FORMAT_NV12) {
3121                         y_data_rate = skl_plane_relative_data_rate(cstate,
3122                                                                    pstate,
3123                                                                    1);
3124                         y_plane_blocks = y_minimum[id];
3125                         y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
3126                                                 total_data_rate);
3127
3128                         ddb->y_plane[pipe][id].start = start;
3129                         ddb->y_plane[pipe][id].end = start + y_plane_blocks;
3130
3131                         start += y_plane_blocks;
3132                 }
3133
3134         }
3135
3136 }
3137
3138 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
3139 {
3140         /* TODO: Take into account the scalers once we support them */
3141         return config->base.adjusted_mode.crtc_clock;
3142 }
3143
3144 /*
3145  * The max latency should be 257 (max the punit can code is 255 and we add 2us
3146  * for the read latency) and cpp should always be <= 8, so that
3147  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3148  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3149 */
3150 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
3151 {
3152         uint32_t wm_intermediate_val, ret;
3153
3154         if (latency == 0)
3155                 return UINT_MAX;
3156
3157         wm_intermediate_val = latency * pixel_rate * cpp / 512;
3158         ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3159
3160         return ret;
3161 }
3162
3163 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3164                                uint32_t horiz_pixels, uint8_t cpp,
3165                                uint64_t tiling, uint32_t latency)
3166 {
3167         uint32_t ret;
3168         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3169         uint32_t wm_intermediate_val;
3170
3171         if (latency == 0)
3172                 return UINT_MAX;
3173
3174         plane_bytes_per_line = horiz_pixels * cpp;
3175
3176         if (tiling == I915_FORMAT_MOD_Y_TILED ||
3177             tiling == I915_FORMAT_MOD_Yf_TILED) {
3178                 plane_bytes_per_line *= 4;
3179                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3180                 plane_blocks_per_line /= 4;
3181         } else {
3182                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3183         }
3184
3185         wm_intermediate_val = latency * pixel_rate;
3186         ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3187                                 plane_blocks_per_line;
3188
3189         return ret;
3190 }
3191
3192 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3193                                        const struct intel_crtc *intel_crtc)
3194 {
3195         struct drm_device *dev = intel_crtc->base.dev;
3196         struct drm_i915_private *dev_priv = dev->dev_private;
3197         const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3198
3199         /*
3200          * If ddb allocation of pipes changed, it may require recalculation of
3201          * watermarks
3202          */
3203         if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe)))
3204                 return true;
3205
3206         return false;
3207 }
3208
3209 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3210                                  struct intel_crtc_state *cstate,
3211                                  struct intel_plane *intel_plane,
3212                                  uint16_t ddb_allocation,
3213                                  int level,
3214                                  uint16_t *out_blocks, /* out */
3215                                  uint8_t *out_lines /* out */)
3216 {
3217         struct drm_plane *plane = &intel_plane->base;
3218         struct drm_framebuffer *fb = plane->state->fb;
3219         struct intel_plane_state *intel_pstate =
3220                                         to_intel_plane_state(plane->state);
3221         uint32_t latency = dev_priv->wm.skl_latency[level];
3222         uint32_t method1, method2;
3223         uint32_t plane_bytes_per_line, plane_blocks_per_line;
3224         uint32_t res_blocks, res_lines;
3225         uint32_t selected_result;
3226         uint8_t cpp;
3227         uint32_t width = 0, height = 0;
3228
3229         if (latency == 0 || !cstate->base.active || !intel_pstate->visible)
3230                 return false;
3231
3232         width = drm_rect_width(&intel_pstate->src) >> 16;
3233         height = drm_rect_height(&intel_pstate->src) >> 16;
3234
3235         if (intel_rotation_90_or_270(plane->state->rotation))
3236                 swap(width, height);
3237
3238         cpp = drm_format_plane_cpp(fb->pixel_format, 0);
3239         method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate),
3240                                  cpp, latency);
3241         method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
3242                                  cstate->base.adjusted_mode.crtc_htotal,
3243                                  width,
3244                                  cpp,
3245                                  fb->modifier[0],
3246                                  latency);
3247
3248         plane_bytes_per_line = width * cpp;
3249         plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3250
3251         if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
3252             fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
3253                 uint32_t min_scanlines = 4;
3254                 uint32_t y_tile_minimum;
3255                 if (intel_rotation_90_or_270(plane->state->rotation)) {
3256                         int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
3257                                 drm_format_plane_cpp(fb->pixel_format, 1) :
3258                                 drm_format_plane_cpp(fb->pixel_format, 0);
3259
3260                         switch (cpp) {
3261                         case 1:
3262                                 min_scanlines = 16;
3263                                 break;
3264                         case 2:
3265                                 min_scanlines = 8;
3266                                 break;
3267                         case 8:
3268                                 WARN(1, "Unsupported pixel depth for rotation");
3269                         }
3270                 }
3271                 y_tile_minimum = plane_blocks_per_line * min_scanlines;
3272                 selected_result = max(method2, y_tile_minimum);
3273         } else {
3274                 if ((ddb_allocation / plane_blocks_per_line) >= 1)
3275                         selected_result = min(method1, method2);
3276                 else
3277                         selected_result = method1;
3278         }
3279
3280         res_blocks = selected_result + 1;
3281         res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3282
3283         if (level >= 1 && level <= 7) {
3284                 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
3285                     fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)
3286                         res_lines += 4;
3287                 else
3288                         res_blocks++;
3289         }
3290
3291         if (res_blocks >= ddb_allocation || res_lines > 31)
3292                 return false;
3293
3294         *out_blocks = res_blocks;
3295         *out_lines = res_lines;
3296
3297         return true;
3298 }
3299
3300 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3301                                  struct skl_ddb_allocation *ddb,
3302                                  struct intel_crtc_state *cstate,
3303                                  int level,
3304                                  struct skl_wm_level *result)
3305 {
3306         struct drm_device *dev = dev_priv->dev;
3307         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3308         struct intel_plane *intel_plane;
3309         uint16_t ddb_blocks;
3310         enum pipe pipe = intel_crtc->pipe;
3311
3312         for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
3313                 int i = skl_wm_plane_id(intel_plane);
3314
3315                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3316
3317                 result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3318                                                 cstate,
3319                                                 intel_plane,
3320                                                 ddb_blocks,
3321                                                 level,
3322                                                 &result->plane_res_b[i],
3323                                                 &result->plane_res_l[i]);
3324         }
3325 }
3326
3327 static uint32_t
3328 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
3329 {
3330         if (!cstate->base.active)
3331                 return 0;
3332
3333         if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0))
3334                 return 0;
3335
3336         return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000,
3337                             skl_pipe_pixel_rate(cstate));
3338 }
3339
3340 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
3341                                       struct skl_wm_level *trans_wm /* out */)
3342 {
3343         struct drm_crtc *crtc = cstate->base.crtc;
3344         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3345         struct intel_plane *intel_plane;
3346
3347         if (!cstate->base.active)
3348                 return;
3349
3350         /* Until we know more, just disable transition WMs */
3351         for_each_intel_plane_on_crtc(crtc->dev, intel_crtc, intel_plane) {
3352                 int i = skl_wm_plane_id(intel_plane);
3353
3354                 trans_wm->plane_en[i] = false;
3355         }
3356 }
3357
3358 static void skl_compute_pipe_wm(struct intel_crtc_state *cstate,
3359                                 struct skl_ddb_allocation *ddb,
3360                                 struct skl_pipe_wm *pipe_wm)
3361 {
3362         struct drm_device *dev = cstate->base.crtc->dev;
3363         const struct drm_i915_private *dev_priv = dev->dev_private;
3364         int level, max_level = ilk_wm_max_level(dev);
3365
3366         for (level = 0; level <= max_level; level++) {
3367                 skl_compute_wm_level(dev_priv, ddb, cstate,
3368                                      level, &pipe_wm->wm[level]);
3369         }
3370         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
3371
3372         skl_compute_transition_wm(cstate, &pipe_wm->trans_wm);
3373 }
3374
3375 static void skl_compute_wm_results(struct drm_device *dev,
3376                                    struct skl_pipe_wm *p_wm,
3377                                    struct skl_wm_values *r,
3378                                    struct intel_crtc *intel_crtc)
3379 {
3380         int level, max_level = ilk_wm_max_level(dev);
3381         enum pipe pipe = intel_crtc->pipe;
3382         uint32_t temp;
3383         int i;
3384
3385         for (level = 0; level <= max_level; level++) {
3386                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3387                         temp = 0;
3388
3389                         temp |= p_wm->wm[level].plane_res_l[i] <<
3390                                         PLANE_WM_LINES_SHIFT;
3391                         temp |= p_wm->wm[level].plane_res_b[i];
3392                         if (p_wm->wm[level].plane_en[i])
3393                                 temp |= PLANE_WM_EN;
3394
3395                         r->plane[pipe][i][level] = temp;
3396                 }
3397
3398                 temp = 0;
3399
3400                 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3401                 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
3402
3403                 if (p_wm->wm[level].plane_en[PLANE_CURSOR])
3404                         temp |= PLANE_WM_EN;
3405
3406                 r->plane[pipe][PLANE_CURSOR][level] = temp;
3407
3408         }
3409
3410         /* transition WMs */
3411         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3412                 temp = 0;
3413                 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3414                 temp |= p_wm->trans_wm.plane_res_b[i];
3415                 if (p_wm->trans_wm.plane_en[i])
3416                         temp |= PLANE_WM_EN;
3417
3418                 r->plane_trans[pipe][i] = temp;
3419         }
3420
3421         temp = 0;
3422         temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3423         temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
3424         if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
3425                 temp |= PLANE_WM_EN;
3426
3427         r->plane_trans[pipe][PLANE_CURSOR] = temp;
3428
3429         r->wm_linetime[pipe] = p_wm->linetime;
3430 }
3431
3432 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
3433                                 i915_reg_t reg,
3434                                 const struct skl_ddb_entry *entry)
3435 {
3436         if (entry->end)
3437                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3438         else
3439                 I915_WRITE(reg, 0);
3440 }
3441
3442 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3443                                 const struct skl_wm_values *new)
3444 {
3445         struct drm_device *dev = dev_priv->dev;
3446         struct intel_crtc *crtc;
3447
3448         for_each_intel_crtc(dev, crtc) {
3449                 int i, level, max_level = ilk_wm_max_level(dev);
3450                 enum pipe pipe = crtc->pipe;
3451
3452                 if (!new->dirty[pipe])
3453                         continue;
3454
3455                 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3456
3457                 for (level = 0; level <= max_level; level++) {
3458                         for (i = 0; i < intel_num_planes(crtc); i++)
3459                                 I915_WRITE(PLANE_WM(pipe, i, level),
3460                                            new->plane[pipe][i][level]);
3461                         I915_WRITE(CUR_WM(pipe, level),
3462                                    new->plane[pipe][PLANE_CURSOR][level]);
3463                 }
3464                 for (i = 0; i < intel_num_planes(crtc); i++)
3465                         I915_WRITE(PLANE_WM_TRANS(pipe, i),
3466                                    new->plane_trans[pipe][i]);
3467                 I915_WRITE(CUR_WM_TRANS(pipe),
3468                            new->plane_trans[pipe][PLANE_CURSOR]);
3469
3470                 for (i = 0; i < intel_num_planes(crtc); i++) {
3471                         skl_ddb_entry_write(dev_priv,
3472                                             PLANE_BUF_CFG(pipe, i),
3473                                             &new->ddb.plane[pipe][i]);
3474                         skl_ddb_entry_write(dev_priv,
3475                                             PLANE_NV12_BUF_CFG(pipe, i),
3476                                             &new->ddb.y_plane[pipe][i]);
3477                 }
3478
3479                 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3480                                     &new->ddb.plane[pipe][PLANE_CURSOR]);
3481         }
3482 }
3483
3484 /*
3485  * When setting up a new DDB allocation arrangement, we need to correctly
3486  * sequence the times at which the new allocations for the pipes are taken into
3487  * account or we'll have pipes fetching from space previously allocated to
3488  * another pipe.
3489  *
3490  * Roughly the sequence looks like:
3491  *  1. re-allocate the pipe(s) with the allocation being reduced and not
3492  *     overlapping with a previous light-up pipe (another way to put it is:
3493  *     pipes with their new allocation strickly included into their old ones).
3494  *  2. re-allocate the other pipes that get their allocation reduced
3495  *  3. allocate the pipes having their allocation increased
3496  *
3497  * Steps 1. and 2. are here to take care of the following case:
3498  * - Initially DDB looks like this:
3499  *     |   B    |   C    |
3500  * - enable pipe A.
3501  * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3502  *   allocation
3503  *     |  A  |  B  |  C  |
3504  *
3505  * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3506  */
3507
3508 static void
3509 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)
3510 {
3511         int plane;
3512
3513         DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3514
3515         for_each_plane(dev_priv, pipe, plane) {
3516                 I915_WRITE(PLANE_SURF(pipe, plane),
3517                            I915_READ(PLANE_SURF(pipe, plane)));
3518         }
3519         I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3520 }
3521
3522 static bool
3523 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3524                             const struct skl_ddb_allocation *new,
3525                             enum pipe pipe)
3526 {
3527         uint16_t old_size, new_size;
3528
3529         old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3530         new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3531
3532         return old_size != new_size &&
3533                new->pipe[pipe].start >= old->pipe[pipe].start &&
3534                new->pipe[pipe].end <= old->pipe[pipe].end;
3535 }
3536
3537 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3538                                 struct skl_wm_values *new_values)
3539 {
3540         struct drm_device *dev = dev_priv->dev;
3541         struct skl_ddb_allocation *cur_ddb, *new_ddb;
3542         bool reallocated[I915_MAX_PIPES] = {};
3543         struct intel_crtc *crtc;
3544         enum pipe pipe;
3545
3546         new_ddb = &new_values->ddb;
3547         cur_ddb = &dev_priv->wm.skl_hw.ddb;
3548
3549         /*
3550          * First pass: flush the pipes with the new allocation contained into
3551          * the old space.
3552          *
3553          * We'll wait for the vblank on those pipes to ensure we can safely
3554          * re-allocate the freed space without this pipe fetching from it.
3555          */
3556         for_each_intel_crtc(dev, crtc) {
3557                 if (!crtc->active)
3558                         continue;
3559
3560                 pipe = crtc->pipe;
3561
3562                 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3563                         continue;
3564
3565                 skl_wm_flush_pipe(dev_priv, pipe, 1);
3566                 intel_wait_for_vblank(dev, pipe);
3567
3568                 reallocated[pipe] = true;
3569         }
3570
3571
3572         /*
3573          * Second pass: flush the pipes that are having their allocation
3574          * reduced, but overlapping with a previous allocation.
3575          *
3576          * Here as well we need to wait for the vblank to make sure the freed
3577          * space is not used anymore.
3578          */
3579         for_each_intel_crtc(dev, crtc) {
3580                 if (!crtc->active)
3581                         continue;
3582
3583                 pipe = crtc->pipe;
3584
3585                 if (reallocated[pipe])
3586                         continue;
3587
3588                 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3589                     skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3590                         skl_wm_flush_pipe(dev_priv, pipe, 2);
3591                         intel_wait_for_vblank(dev, pipe);
3592                         reallocated[pipe] = true;
3593                 }
3594         }
3595
3596         /*
3597          * Third pass: flush the pipes that got more space allocated.
3598          *
3599          * We don't need to actively wait for the update here, next vblank
3600          * will just get more DDB space with the correct WM values.
3601          */
3602         for_each_intel_crtc(dev, crtc) {
3603                 if (!crtc->active)
3604                         continue;
3605
3606                 pipe = crtc->pipe;
3607
3608                 /*
3609                  * At this point, only the pipes more space than before are
3610                  * left to re-allocate.
3611                  */
3612                 if (reallocated[pipe])
3613                         continue;
3614
3615                 skl_wm_flush_pipe(dev_priv, pipe, 3);
3616         }
3617 }
3618
3619 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3620                                struct skl_ddb_allocation *ddb, /* out */
3621                                struct skl_pipe_wm *pipe_wm /* out */)
3622 {
3623         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3624         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3625
3626         skl_allocate_pipe_ddb(cstate, ddb);
3627         skl_compute_pipe_wm(cstate, ddb, pipe_wm);
3628
3629         if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
3630                 return false;
3631
3632         intel_crtc->wm.active.skl = *pipe_wm;
3633
3634         return true;
3635 }
3636
3637 static void skl_update_other_pipe_wm(struct drm_device *dev,
3638                                      struct drm_crtc *crtc,
3639                                      struct skl_wm_values *r)
3640 {
3641         struct intel_crtc *intel_crtc;
3642         struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3643
3644         /*
3645          * If the WM update hasn't changed the allocation for this_crtc (the
3646          * crtc we are currently computing the new WM values for), other
3647          * enabled crtcs will keep the same allocation and we don't need to
3648          * recompute anything for them.
3649          */
3650         if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3651                 return;
3652
3653         /*
3654          * Otherwise, because of this_crtc being freshly enabled/disabled, the
3655          * other active pipes need new DDB allocation and WM values.
3656          */
3657         for_each_intel_crtc(dev, intel_crtc) {
3658                 struct skl_pipe_wm pipe_wm = {};
3659                 bool wm_changed;
3660
3661                 if (this_crtc->pipe == intel_crtc->pipe)
3662                         continue;
3663
3664                 if (!intel_crtc->active)
3665                         continue;
3666
3667                 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3668                                                 &r->ddb, &pipe_wm);
3669
3670                 /*
3671                  * If we end up re-computing the other pipe WM values, it's
3672                  * because it was really needed, so we expect the WM values to
3673                  * be different.
3674                  */
3675                 WARN_ON(!wm_changed);
3676
3677                 skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc);
3678                 r->dirty[intel_crtc->pipe] = true;
3679         }
3680 }
3681
3682 static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe)
3683 {
3684         watermarks->wm_linetime[pipe] = 0;
3685         memset(watermarks->plane[pipe], 0,
3686                sizeof(uint32_t) * 8 * I915_MAX_PLANES);
3687         memset(watermarks->plane_trans[pipe],
3688                0, sizeof(uint32_t) * I915_MAX_PLANES);
3689         watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
3690
3691         /* Clear ddb entries for pipe */
3692         memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
3693         memset(&watermarks->ddb.plane[pipe], 0,
3694                sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3695         memset(&watermarks->ddb.y_plane[pipe], 0,
3696                sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3697         memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
3698                sizeof(struct skl_ddb_entry));
3699
3700 }
3701
3702 static void skl_update_wm(struct drm_crtc *crtc)
3703 {
3704         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3705         struct drm_device *dev = crtc->dev;
3706         struct drm_i915_private *dev_priv = dev->dev_private;
3707         struct skl_wm_values *results = &dev_priv->wm.skl_results;
3708         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3709         struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl;
3710
3711
3712         /* Clear all dirty flags */
3713         memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
3714
3715         skl_clear_wm(results, intel_crtc->pipe);
3716
3717         if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm))
3718                 return;
3719
3720         skl_compute_wm_results(dev, pipe_wm, results, intel_crtc);
3721         results->dirty[intel_crtc->pipe] = true;
3722
3723         skl_update_other_pipe_wm(dev, crtc, results);
3724         skl_write_wm_values(dev_priv, results);
3725         skl_flush_wm_values(dev_priv, results);
3726
3727         /* store the new configuration */
3728         dev_priv->wm.skl_hw = *results;
3729 }
3730
3731 static void ilk_compute_wm_config(struct drm_device *dev,
3732                                   struct intel_wm_config *config)
3733 {
3734         struct intel_crtc *crtc;
3735
3736         /* Compute the currently _active_ config */
3737         for_each_intel_crtc(dev, crtc) {
3738                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
3739
3740                 if (!wm->pipe_enabled)
3741                         continue;
3742
3743                 config->sprites_enabled |= wm->sprites_enabled;
3744                 config->sprites_scaled |= wm->sprites_scaled;
3745                 config->num_pipes_active++;
3746         }
3747 }
3748
3749 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
3750 {
3751         struct drm_device *dev = dev_priv->dev;
3752         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3753         struct ilk_wm_maximums max;
3754         struct intel_wm_config config = {};
3755         struct ilk_wm_values results = {};
3756         enum intel_ddb_partitioning partitioning;
3757
3758         ilk_compute_wm_config(dev, &config);
3759
3760         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3761         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3762
3763         /* 5/6 split only in single pipe config on IVB+ */
3764         if (INTEL_INFO(dev)->gen >= 7 &&
3765             config.num_pipes_active == 1 && config.sprites_enabled) {
3766                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3767                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3768
3769                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3770         } else {
3771                 best_lp_wm = &lp_wm_1_2;
3772         }
3773
3774         partitioning = (best_lp_wm == &lp_wm_1_2) ?
3775                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3776
3777         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3778
3779         ilk_write_wm_values(dev_priv, &results);
3780 }
3781
3782 static void ilk_initial_watermarks(struct intel_crtc_state *cstate)
3783 {
3784         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
3785         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3786
3787         mutex_lock(&dev_priv->wm.wm_mutex);
3788         intel_crtc->wm.active.ilk = cstate->wm.intermediate;
3789         ilk_program_watermarks(dev_priv);
3790         mutex_unlock(&dev_priv->wm.wm_mutex);
3791 }
3792
3793 static void ilk_optimize_watermarks(struct intel_crtc_state *cstate)
3794 {
3795         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
3796         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3797
3798         mutex_lock(&dev_priv->wm.wm_mutex);
3799         if (cstate->wm.need_postvbl_update) {
3800                 intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk;
3801                 ilk_program_watermarks(dev_priv);
3802         }
3803         mutex_unlock(&dev_priv->wm.wm_mutex);
3804 }
3805
3806 static void skl_pipe_wm_active_state(uint32_t val,
3807                                      struct skl_pipe_wm *active,
3808                                      bool is_transwm,
3809                                      bool is_cursor,
3810                                      int i,
3811                                      int level)
3812 {
3813         bool is_enabled = (val & PLANE_WM_EN) != 0;
3814
3815         if (!is_transwm) {
3816                 if (!is_cursor) {
3817                         active->wm[level].plane_en[i] = is_enabled;
3818                         active->wm[level].plane_res_b[i] =
3819                                         val & PLANE_WM_BLOCKS_MASK;
3820                         active->wm[level].plane_res_l[i] =
3821                                         (val >> PLANE_WM_LINES_SHIFT) &
3822                                                 PLANE_WM_LINES_MASK;
3823                 } else {
3824                         active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
3825                         active->wm[level].plane_res_b[PLANE_CURSOR] =
3826                                         val & PLANE_WM_BLOCKS_MASK;
3827                         active->wm[level].plane_res_l[PLANE_CURSOR] =
3828                                         (val >> PLANE_WM_LINES_SHIFT) &
3829                                                 PLANE_WM_LINES_MASK;
3830                 }
3831         } else {
3832                 if (!is_cursor) {
3833                         active->trans_wm.plane_en[i] = is_enabled;
3834                         active->trans_wm.plane_res_b[i] =
3835                                         val & PLANE_WM_BLOCKS_MASK;
3836                         active->trans_wm.plane_res_l[i] =
3837                                         (val >> PLANE_WM_LINES_SHIFT) &
3838                                                 PLANE_WM_LINES_MASK;
3839                 } else {
3840                         active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
3841                         active->trans_wm.plane_res_b[PLANE_CURSOR] =
3842                                         val & PLANE_WM_BLOCKS_MASK;
3843                         active->trans_wm.plane_res_l[PLANE_CURSOR] =
3844                                         (val >> PLANE_WM_LINES_SHIFT) &
3845                                                 PLANE_WM_LINES_MASK;
3846                 }
3847         }
3848 }
3849
3850 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3851 {
3852         struct drm_device *dev = crtc->dev;
3853         struct drm_i915_private *dev_priv = dev->dev_private;
3854         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3855         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3856         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3857         struct skl_pipe_wm *active = &cstate->wm.optimal.skl;
3858         enum pipe pipe = intel_crtc->pipe;
3859         int level, i, max_level;
3860         uint32_t temp;
3861
3862         max_level = ilk_wm_max_level(dev);
3863
3864         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3865
3866         for (level = 0; level <= max_level; level++) {
3867                 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3868                         hw->plane[pipe][i][level] =
3869                                         I915_READ(PLANE_WM(pipe, i, level));
3870                 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
3871         }
3872
3873         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3874                 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3875         hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
3876
3877         if (!intel_crtc->active)
3878                 return;
3879
3880         hw->dirty[pipe] = true;
3881
3882         active->linetime = hw->wm_linetime[pipe];
3883
3884         for (level = 0; level <= max_level; level++) {
3885                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3886                         temp = hw->plane[pipe][i][level];
3887                         skl_pipe_wm_active_state(temp, active, false,
3888                                                 false, i, level);
3889                 }
3890                 temp = hw->plane[pipe][PLANE_CURSOR][level];
3891                 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3892         }
3893
3894         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3895                 temp = hw->plane_trans[pipe][i];
3896                 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3897         }
3898
3899         temp = hw->plane_trans[pipe][PLANE_CURSOR];
3900         skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3901
3902         intel_crtc->wm.active.skl = *active;
3903 }
3904
3905 void skl_wm_get_hw_state(struct drm_device *dev)
3906 {
3907         struct drm_i915_private *dev_priv = dev->dev_private;
3908         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3909         struct drm_crtc *crtc;
3910
3911         skl_ddb_get_hw_state(dev_priv, ddb);
3912         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3913                 skl_pipe_wm_get_hw_state(crtc);
3914 }
3915
3916 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3917 {
3918         struct drm_device *dev = crtc->dev;
3919         struct drm_i915_private *dev_priv = dev->dev_private;
3920         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3921         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3922         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3923         struct intel_pipe_wm *active = &cstate->wm.optimal.ilk;
3924         enum pipe pipe = intel_crtc->pipe;
3925         static const i915_reg_t wm0_pipe_reg[] = {
3926                 [PIPE_A] = WM0_PIPEA_ILK,
3927                 [PIPE_B] = WM0_PIPEB_ILK,
3928                 [PIPE_C] = WM0_PIPEC_IVB,
3929         };
3930
3931         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3932         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3933                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3934
3935         memset(active, 0, sizeof(*active));
3936
3937         active->pipe_enabled = intel_crtc->active;
3938
3939         if (active->pipe_enabled) {
3940                 u32 tmp = hw->wm_pipe[pipe];
3941
3942                 /*
3943                  * For active pipes LP0 watermark is marked as
3944                  * enabled, and LP1+ watermaks as disabled since
3945                  * we can't really reverse compute them in case
3946                  * multiple pipes are active.
3947                  */
3948                 active->wm[0].enable = true;
3949                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3950                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3951                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3952                 active->linetime = hw->wm_linetime[pipe];
3953         } else {
3954                 int level, max_level = ilk_wm_max_level(dev);
3955
3956                 /*
3957                  * For inactive pipes, all watermark levels
3958                  * should be marked as enabled but zeroed,
3959                  * which is what we'd compute them to.
3960                  */
3961                 for (level = 0; level <= max_level; level++)
3962                         active->wm[level].enable = true;
3963         }
3964
3965         intel_crtc->wm.active.ilk = *active;
3966 }
3967
3968 #define _FW_WM(value, plane) \
3969         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3970 #define _FW_WM_VLV(value, plane) \
3971         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3972
3973 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3974                                struct vlv_wm_values *wm)
3975 {
3976         enum pipe pipe;
3977         uint32_t tmp;
3978
3979         for_each_pipe(dev_priv, pipe) {
3980                 tmp = I915_READ(VLV_DDL(pipe));
3981
3982                 wm->ddl[pipe].primary =
3983                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3984                 wm->ddl[pipe].cursor =
3985                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3986                 wm->ddl[pipe].sprite[0] =
3987                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3988                 wm->ddl[pipe].sprite[1] =
3989                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3990         }
3991
3992         tmp = I915_READ(DSPFW1);
3993         wm->sr.plane = _FW_WM(tmp, SR);
3994         wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3995         wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3996         wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3997
3998         tmp = I915_READ(DSPFW2);
3999         wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
4000         wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
4001         wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
4002
4003         tmp = I915_READ(DSPFW3);
4004         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
4005
4006         if (IS_CHERRYVIEW(dev_priv)) {
4007                 tmp = I915_READ(DSPFW7_CHV);
4008                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4009                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4010
4011                 tmp = I915_READ(DSPFW8_CHV);
4012                 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
4013                 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
4014
4015                 tmp = I915_READ(DSPFW9_CHV);
4016                 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
4017                 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
4018
4019                 tmp = I915_READ(DSPHOWM);
4020                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4021                 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
4022                 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
4023                 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
4024                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4025                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4026                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4027                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4028                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4029                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4030         } else {
4031                 tmp = I915_READ(DSPFW7);
4032                 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4033                 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4034
4035                 tmp = I915_READ(DSPHOWM);
4036                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4037                 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4038                 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4039                 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4040                 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4041                 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4042                 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4043         }
4044 }
4045
4046 #undef _FW_WM
4047 #undef _FW_WM_VLV
4048
4049 void vlv_wm_get_hw_state(struct drm_device *dev)
4050 {
4051         struct drm_i915_private *dev_priv = to_i915(dev);
4052         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4053         struct intel_plane *plane;
4054         enum pipe pipe;
4055         u32 val;
4056
4057         vlv_read_wm_values(dev_priv, wm);
4058
4059         for_each_intel_plane(dev, plane) {
4060                 switch (plane->base.type) {
4061                         int sprite;
4062                 case DRM_PLANE_TYPE_CURSOR:
4063                         plane->wm.fifo_size = 63;
4064                         break;
4065                 case DRM_PLANE_TYPE_PRIMARY:
4066                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4067                         break;
4068                 case DRM_PLANE_TYPE_OVERLAY:
4069                         sprite = plane->plane;
4070                         plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4071                         break;
4072                 }
4073         }
4074
4075         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4076         wm->level = VLV_WM_LEVEL_PM2;
4077
4078         if (IS_CHERRYVIEW(dev_priv)) {
4079                 mutex_lock(&dev_priv->rps.hw_lock);
4080
4081                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4082                 if (val & DSP_MAXFIFO_PM5_ENABLE)
4083                         wm->level = VLV_WM_LEVEL_PM5;
4084
4085                 /*
4086                  * If DDR DVFS is disabled in the BIOS, Punit
4087                  * will never ack the request. So if that happens
4088                  * assume we don't have to enable/disable DDR DVFS
4089                  * dynamically. To test that just set the REQ_ACK
4090                  * bit to poke the Punit, but don't change the
4091                  * HIGH/LOW bits so that we don't actually change
4092                  * the current state.
4093                  */
4094                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4095                 val |= FORCE_DDR_FREQ_REQ_ACK;
4096                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4097
4098                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4099                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4100                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4101                                       "assuming DDR DVFS is disabled\n");
4102                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4103                 } else {
4104                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4105                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4106                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4107                 }
4108
4109                 mutex_unlock(&dev_priv->rps.hw_lock);
4110         }
4111
4112         for_each_pipe(dev_priv, pipe)
4113                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4114                               pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4115                               wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4116
4117         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4118                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4119 }
4120
4121 void ilk_wm_get_hw_state(struct drm_device *dev)
4122 {
4123         struct drm_i915_private *dev_priv = dev->dev_private;
4124         struct ilk_wm_values *hw = &dev_priv->wm.hw;
4125         struct drm_crtc *crtc;
4126
4127         for_each_crtc(dev, crtc)
4128                 ilk_pipe_wm_get_hw_state(crtc);
4129
4130         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4131         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4132         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4133
4134         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4135         if (INTEL_INFO(dev)->gen >= 7) {
4136                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4137                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4138         }
4139
4140         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4141                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4142                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4143         else if (IS_IVYBRIDGE(dev))
4144                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4145                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4146
4147         hw->enable_fbc_wm =
4148                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4149 }
4150
4151 /**
4152  * intel_update_watermarks - update FIFO watermark values based on current modes
4153  *
4154  * Calculate watermark values for the various WM regs based on current mode
4155  * and plane configuration.
4156  *
4157  * There are several cases to deal with here:
4158  *   - normal (i.e. non-self-refresh)
4159  *   - self-refresh (SR) mode
4160  *   - lines are large relative to FIFO size (buffer can hold up to 2)
4161  *   - lines are small relative to FIFO size (buffer can hold more than 2
4162  *     lines), so need to account for TLB latency
4163  *
4164  *   The normal calculation is:
4165  *     watermark = dotclock * bytes per pixel * latency
4166  *   where latency is platform & configuration dependent (we assume pessimal
4167  *   values here).
4168  *
4169  *   The SR calculation is:
4170  *     watermark = (trunc(latency/line time)+1) * surface width *
4171  *       bytes per pixel
4172  *   where
4173  *     line time = htotal / dotclock
4174  *     surface width = hdisplay for normal plane and 64 for cursor
4175  *   and latency is assumed to be high, as above.
4176  *
4177  * The final value programmed to the register should always be rounded up,
4178  * and include an extra 2 entries to account for clock crossings.
4179  *
4180  * We don't use the sprite, so we can ignore that.  And on Crestline we have
4181  * to set the non-SR watermarks to 8.
4182  */
4183 void intel_update_watermarks(struct drm_crtc *crtc)
4184 {
4185         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4186
4187         if (dev_priv->display.update_wm)
4188                 dev_priv->display.update_wm(crtc);
4189 }
4190
4191 /*
4192  * Lock protecting IPS related data structures
4193  */
4194 DEFINE_SPINLOCK(mchdev_lock);
4195
4196 /* Global for IPS driver to get at the current i915 device. Protected by
4197  * mchdev_lock. */
4198 static struct drm_i915_private *i915_mch_dev;
4199
4200 bool ironlake_set_drps(struct drm_device *dev, u8 val)
4201 {
4202         struct drm_i915_private *dev_priv = dev->dev_private;
4203         u16 rgvswctl;
4204
4205         assert_spin_locked(&mchdev_lock);
4206
4207         rgvswctl = I915_READ16(MEMSWCTL);
4208         if (rgvswctl & MEMCTL_CMD_STS) {
4209                 DRM_DEBUG("gpu busy, RCS change rejected\n");
4210                 return false; /* still busy with another command */
4211         }
4212
4213         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4214                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4215         I915_WRITE16(MEMSWCTL, rgvswctl);
4216         POSTING_READ16(MEMSWCTL);
4217
4218         rgvswctl |= MEMCTL_CMD_STS;
4219         I915_WRITE16(MEMSWCTL, rgvswctl);
4220
4221         return true;
4222 }
4223
4224 static void ironlake_enable_drps(struct drm_device *dev)
4225 {
4226         struct drm_i915_private *dev_priv = dev->dev_private;
4227         u32 rgvmodectl;
4228         u8 fmax, fmin, fstart, vstart;
4229
4230         spin_lock_irq(&mchdev_lock);
4231
4232         rgvmodectl = I915_READ(MEMMODECTL);
4233
4234         /* Enable temp reporting */
4235         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4236         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4237
4238         /* 100ms RC evaluation intervals */
4239         I915_WRITE(RCUPEI, 100000);
4240         I915_WRITE(RCDNEI, 100000);
4241
4242         /* Set max/min thresholds to 90ms and 80ms respectively */
4243         I915_WRITE(RCBMAXAVG, 90000);
4244         I915_WRITE(RCBMINAVG, 80000);
4245
4246         I915_WRITE(MEMIHYST, 1);
4247
4248         /* Set up min, max, and cur for interrupt handling */
4249         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4250         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4251         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4252                 MEMMODE_FSTART_SHIFT;
4253
4254         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
4255                 PXVFREQ_PX_SHIFT;
4256
4257         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4258         dev_priv->ips.fstart = fstart;
4259
4260         dev_priv->ips.max_delay = fstart;
4261         dev_priv->ips.min_delay = fmin;
4262         dev_priv->ips.cur_delay = fstart;
4263
4264         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4265                          fmax, fmin, fstart);
4266
4267         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4268
4269         /*
4270          * Interrupts will be enabled in ironlake_irq_postinstall
4271          */
4272
4273         I915_WRITE(VIDSTART, vstart);
4274         POSTING_READ(VIDSTART);
4275
4276         rgvmodectl |= MEMMODE_SWMODE_EN;
4277         I915_WRITE(MEMMODECTL, rgvmodectl);
4278
4279         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4280                 DRM_ERROR("stuck trying to change perf mode\n");
4281         mdelay(1);
4282
4283         ironlake_set_drps(dev, fstart);
4284
4285         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
4286                 I915_READ(DDREC) + I915_READ(CSIEC);
4287         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4288         dev_priv->ips.last_count2 = I915_READ(GFXEC);
4289         dev_priv->ips.last_time2 = ktime_get_raw_ns();
4290
4291         spin_unlock_irq(&mchdev_lock);
4292 }
4293
4294 static void ironlake_disable_drps(struct drm_device *dev)
4295 {
4296         struct drm_i915_private *dev_priv = dev->dev_private;
4297         u16 rgvswctl;
4298
4299         spin_lock_irq(&mchdev_lock);
4300
4301         rgvswctl = I915_READ16(MEMSWCTL);
4302
4303         /* Ack interrupts, disable EFC interrupt */
4304         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4305         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4306         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4307         I915_WRITE(DEIIR, DE_PCU_EVENT);
4308         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4309
4310         /* Go back to the starting frequency */
4311         ironlake_set_drps(dev, dev_priv->ips.fstart);
4312         mdelay(1);
4313         rgvswctl |= MEMCTL_CMD_STS;
4314         I915_WRITE(MEMSWCTL, rgvswctl);
4315         mdelay(1);
4316
4317         spin_unlock_irq(&mchdev_lock);
4318 }
4319
4320 /* There's a funny hw issue where the hw returns all 0 when reading from
4321  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4322  * ourselves, instead of doing a rmw cycle (which might result in us clearing
4323  * all limits and the gpu stuck at whatever frequency it is at atm).
4324  */
4325 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4326 {
4327         u32 limits;
4328
4329         /* Only set the down limit when we've reached the lowest level to avoid
4330          * getting more interrupts, otherwise leave this clear. This prevents a
4331          * race in the hw when coming out of rc6: There's a tiny window where
4332          * the hw runs at the minimal clock before selecting the desired
4333          * frequency, if the down threshold expires in that window we will not
4334          * receive a down interrupt. */
4335         if (IS_GEN9(dev_priv)) {
4336                 limits = (dev_priv->rps.max_freq_softlimit) << 23;
4337                 if (val <= dev_priv->rps.min_freq_softlimit)
4338                         limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4339         } else {
4340                 limits = dev_priv->rps.max_freq_softlimit << 24;
4341                 if (val <= dev_priv->rps.min_freq_softlimit)
4342                         limits |= dev_priv->rps.min_freq_softlimit << 16;
4343         }
4344
4345         return limits;
4346 }
4347
4348 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4349 {
4350         int new_power;
4351         u32 threshold_up = 0, threshold_down = 0; /* in % */
4352         u32 ei_up = 0, ei_down = 0;
4353
4354         new_power = dev_priv->rps.power;
4355         switch (dev_priv->rps.power) {
4356         case LOW_POWER:
4357                 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4358                         new_power = BETWEEN;
4359                 break;
4360
4361         case BETWEEN:
4362                 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4363                         new_power = LOW_POWER;
4364                 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4365                         new_power = HIGH_POWER;
4366                 break;
4367
4368         case HIGH_POWER:
4369                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4370                         new_power = BETWEEN;
4371                 break;
4372         }
4373         /* Max/min bins are special */
4374         if (val <= dev_priv->rps.min_freq_softlimit)
4375                 new_power = LOW_POWER;
4376         if (val >= dev_priv->rps.max_freq_softlimit)
4377                 new_power = HIGH_POWER;
4378         if (new_power == dev_priv->rps.power)
4379                 return;
4380
4381         /* Note the units here are not exactly 1us, but 1280ns. */
4382         switch (new_power) {
4383         case LOW_POWER:
4384                 /* Upclock if more than 95% busy over 16ms */
4385                 ei_up = 16000;
4386                 threshold_up = 95;
4387
4388                 /* Downclock if less than 85% busy over 32ms */
4389                 ei_down = 32000;
4390                 threshold_down = 85;
4391                 break;
4392
4393         case BETWEEN:
4394                 /* Upclock if more than 90% busy over 13ms */
4395                 ei_up = 13000;
4396                 threshold_up = 90;
4397
4398                 /* Downclock if less than 75% busy over 32ms */
4399                 ei_down = 32000;
4400                 threshold_down = 75;
4401                 break;
4402
4403         case HIGH_POWER:
4404                 /* Upclock if more than 85% busy over 10ms */
4405                 ei_up = 10000;
4406                 threshold_up = 85;
4407
4408                 /* Downclock if less than 60% busy over 32ms */
4409                 ei_down = 32000;
4410                 threshold_down = 60;
4411                 break;
4412         }
4413
4414         I915_WRITE(GEN6_RP_UP_EI,
4415                 GT_INTERVAL_FROM_US(dev_priv, ei_up));
4416         I915_WRITE(GEN6_RP_UP_THRESHOLD,
4417                 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4418
4419         I915_WRITE(GEN6_RP_DOWN_EI,
4420                 GT_INTERVAL_FROM_US(dev_priv, ei_down));
4421         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4422                 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4423
4424          I915_WRITE(GEN6_RP_CONTROL,
4425                     GEN6_RP_MEDIA_TURBO |
4426                     GEN6_RP_MEDIA_HW_NORMAL_MODE |
4427                     GEN6_RP_MEDIA_IS_GFX |
4428                     GEN6_RP_ENABLE |
4429                     GEN6_RP_UP_BUSY_AVG |
4430                     GEN6_RP_DOWN_IDLE_AVG);
4431
4432         dev_priv->rps.power = new_power;
4433         dev_priv->rps.up_threshold = threshold_up;
4434         dev_priv->rps.down_threshold = threshold_down;
4435         dev_priv->rps.last_adj = 0;
4436 }
4437
4438 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4439 {
4440         u32 mask = 0;
4441
4442         if (val > dev_priv->rps.min_freq_softlimit)
4443                 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4444         if (val < dev_priv->rps.max_freq_softlimit)
4445                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4446
4447         mask &= dev_priv->pm_rps_events;
4448
4449         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4450 }
4451
4452 /* gen6_set_rps is called to update the frequency request, but should also be
4453  * called when the range (min_delay and max_delay) is modified so that we can
4454  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4455 static void gen6_set_rps(struct drm_device *dev, u8 val)
4456 {
4457         struct drm_i915_private *dev_priv = dev->dev_private;
4458
4459         /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4460         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
4461                 return;
4462
4463         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4464         WARN_ON(val > dev_priv->rps.max_freq);
4465         WARN_ON(val < dev_priv->rps.min_freq);
4466
4467         /* min/max delay may still have been modified so be sure to
4468          * write the limits value.
4469          */
4470         if (val != dev_priv->rps.cur_freq) {
4471                 gen6_set_rps_thresholds(dev_priv, val);
4472
4473                 if (IS_GEN9(dev))
4474                         I915_WRITE(GEN6_RPNSWREQ,
4475                                    GEN9_FREQUENCY(val));
4476                 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4477                         I915_WRITE(GEN6_RPNSWREQ,
4478                                    HSW_FREQUENCY(val));
4479                 else
4480                         I915_WRITE(GEN6_RPNSWREQ,
4481                                    GEN6_FREQUENCY(val) |
4482                                    GEN6_OFFSET(0) |
4483                                    GEN6_AGGRESSIVE_TURBO);
4484         }
4485
4486         /* Make sure we continue to get interrupts
4487          * until we hit the minimum or maximum frequencies.
4488          */
4489         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4490         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4491
4492         POSTING_READ(GEN6_RPNSWREQ);
4493
4494         dev_priv->rps.cur_freq = val;
4495         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4496 }
4497
4498 static void valleyview_set_rps(struct drm_device *dev, u8 val)
4499 {
4500         struct drm_i915_private *dev_priv = dev->dev_private;
4501
4502         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4503         WARN_ON(val > dev_priv->rps.max_freq);
4504         WARN_ON(val < dev_priv->rps.min_freq);
4505
4506         if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4507                       "Odd GPU freq value\n"))
4508                 val &= ~1;
4509
4510         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4511
4512         if (val != dev_priv->rps.cur_freq) {
4513                 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4514                 if (!IS_CHERRYVIEW(dev_priv))
4515                         gen6_set_rps_thresholds(dev_priv, val);
4516         }
4517
4518         dev_priv->rps.cur_freq = val;
4519         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4520 }
4521
4522 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4523  *
4524  * * If Gfx is Idle, then
4525  * 1. Forcewake Media well.
4526  * 2. Request idle freq.
4527  * 3. Release Forcewake of Media well.
4528 */
4529 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4530 {
4531         u32 val = dev_priv->rps.idle_freq;
4532
4533         if (dev_priv->rps.cur_freq <= val)
4534                 return;
4535
4536         /* Wake up the media well, as that takes a lot less
4537          * power than the Render well. */
4538         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4539         valleyview_set_rps(dev_priv->dev, val);
4540         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4541 }
4542
4543 void gen6_rps_busy(struct drm_i915_private *dev_priv)
4544 {
4545         mutex_lock(&dev_priv->rps.hw_lock);
4546         if (dev_priv->rps.enabled) {
4547                 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
4548                         gen6_rps_reset_ei(dev_priv);
4549                 I915_WRITE(GEN6_PMINTRMSK,
4550                            gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4551         }
4552         mutex_unlock(&dev_priv->rps.hw_lock);
4553 }
4554
4555 void gen6_rps_idle(struct drm_i915_private *dev_priv)
4556 {
4557         struct drm_device *dev = dev_priv->dev;
4558
4559         mutex_lock(&dev_priv->rps.hw_lock);
4560         if (dev_priv->rps.enabled) {
4561                 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
4562                         vlv_set_rps_idle(dev_priv);
4563                 else
4564                         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4565                 dev_priv->rps.last_adj = 0;
4566                 I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
4567         }
4568         mutex_unlock(&dev_priv->rps.hw_lock);
4569
4570         spin_lock(&dev_priv->rps.client_lock);
4571         while (!list_empty(&dev_priv->rps.clients))
4572                 list_del_init(dev_priv->rps.clients.next);
4573         spin_unlock(&dev_priv->rps.client_lock);
4574 }
4575
4576 void gen6_rps_boost(struct drm_i915_private *dev_priv,
4577                     struct intel_rps_client *rps,
4578                     unsigned long submitted)
4579 {
4580         /* This is intentionally racy! We peek at the state here, then
4581          * validate inside the RPS worker.
4582          */
4583         if (!(dev_priv->mm.busy &&
4584               dev_priv->rps.enabled &&
4585               dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4586                 return;
4587
4588         /* Force a RPS boost (and don't count it against the client) if
4589          * the GPU is severely congested.
4590          */
4591         if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4592                 rps = NULL;
4593
4594         spin_lock(&dev_priv->rps.client_lock);
4595         if (rps == NULL || list_empty(&rps->link)) {
4596                 spin_lock_irq(&dev_priv->irq_lock);
4597                 if (dev_priv->rps.interrupts_enabled) {
4598                         dev_priv->rps.client_boost = true;
4599                         queue_work(dev_priv->wq, &dev_priv->rps.work);
4600                 }
4601                 spin_unlock_irq(&dev_priv->irq_lock);
4602
4603                 if (rps != NULL) {
4604                         list_add(&rps->link, &dev_priv->rps.clients);
4605                         rps->boosts++;
4606                 } else
4607                         dev_priv->rps.boosts++;
4608         }
4609         spin_unlock(&dev_priv->rps.client_lock);
4610 }
4611
4612 void intel_set_rps(struct drm_device *dev, u8 val)
4613 {
4614         if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
4615                 valleyview_set_rps(dev, val);
4616         else
4617                 gen6_set_rps(dev, val);
4618 }
4619
4620 static void gen9_disable_rc6(struct drm_device *dev)
4621 {
4622         struct drm_i915_private *dev_priv = dev->dev_private;
4623
4624         I915_WRITE(GEN6_RC_CONTROL, 0);
4625         I915_WRITE(GEN9_PG_ENABLE, 0);
4626 }
4627
4628 static void gen9_disable_rps(struct drm_device *dev)
4629 {
4630         struct drm_i915_private *dev_priv = dev->dev_private;
4631
4632         I915_WRITE(GEN6_RP_CONTROL, 0);
4633 }
4634
4635 static void gen6_disable_rps(struct drm_device *dev)
4636 {
4637         struct drm_i915_private *dev_priv = dev->dev_private;
4638
4639         I915_WRITE(GEN6_RC_CONTROL, 0);
4640         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
4641         I915_WRITE(GEN6_RP_CONTROL, 0);
4642 }
4643
4644 static void cherryview_disable_rps(struct drm_device *dev)
4645 {
4646         struct drm_i915_private *dev_priv = dev->dev_private;
4647
4648         I915_WRITE(GEN6_RC_CONTROL, 0);
4649 }
4650
4651 static void valleyview_disable_rps(struct drm_device *dev)
4652 {
4653         struct drm_i915_private *dev_priv = dev->dev_private;
4654
4655         /* we're doing forcewake before Disabling RC6,
4656          * This what the BIOS expects when going into suspend */
4657         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4658
4659         I915_WRITE(GEN6_RC_CONTROL, 0);
4660
4661         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4662 }
4663
4664 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4665 {
4666         if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
4667                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4668                         mode = GEN6_RC_CTL_RC6_ENABLE;
4669                 else
4670                         mode = 0;
4671         }
4672         if (HAS_RC6p(dev))
4673                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4674                               onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
4675                               onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
4676                               onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
4677
4678         else
4679                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4680                               onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
4681 }
4682
4683 static bool bxt_check_bios_rc6_setup(const struct drm_device *dev)
4684 {
4685         struct drm_i915_private *dev_priv = to_i915(dev);
4686         struct i915_ggtt *ggtt = &dev_priv->ggtt;
4687         bool enable_rc6 = true;
4688         unsigned long rc6_ctx_base;
4689
4690         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
4691                 DRM_DEBUG_KMS("RC6 Base location not set properly.\n");
4692                 enable_rc6 = false;
4693         }
4694
4695         /*
4696          * The exact context size is not known for BXT, so assume a page size
4697          * for this check.
4698          */
4699         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
4700         if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) &&
4701               (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base +
4702                                         ggtt->stolen_reserved_size))) {
4703                 DRM_DEBUG_KMS("RC6 Base address not as expected.\n");
4704                 enable_rc6 = false;
4705         }
4706
4707         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
4708               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
4709               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
4710               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
4711                 DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n");
4712                 enable_rc6 = false;
4713         }
4714
4715         if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE |
4716                                             GEN6_RC_CTL_HW_ENABLE)) &&
4717             ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) ||
4718              !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) {
4719                 DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n");
4720                 enable_rc6 = false;
4721         }
4722
4723         return enable_rc6;
4724 }
4725
4726 int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4727 {
4728         /* No RC6 before Ironlake and code is gone for ilk. */
4729         if (INTEL_INFO(dev)->gen < 6)
4730                 return 0;
4731
4732         if (!enable_rc6)
4733                 return 0;
4734
4735         if (IS_BROXTON(dev) && !bxt_check_bios_rc6_setup(dev)) {
4736                 DRM_INFO("RC6 disabled by BIOS\n");
4737                 return 0;
4738         }
4739
4740         /* Respect the kernel parameter if it is set */
4741         if (enable_rc6 >= 0) {
4742                 int mask;
4743
4744                 if (HAS_RC6p(dev))
4745                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4746                                INTEL_RC6pp_ENABLE;
4747                 else
4748                         mask = INTEL_RC6_ENABLE;
4749
4750                 if ((enable_rc6 & mask) != enable_rc6)
4751                         DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4752                                       enable_rc6 & mask, enable_rc6, mask);
4753
4754                 return enable_rc6 & mask;
4755         }
4756
4757         if (IS_IVYBRIDGE(dev))
4758                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4759
4760         return INTEL_RC6_ENABLE;
4761 }
4762
4763 int intel_enable_rc6(const struct drm_device *dev)
4764 {
4765         return i915.enable_rc6;
4766 }
4767
4768 static void gen6_init_rps_frequencies(struct drm_device *dev)
4769 {
4770         struct drm_i915_private *dev_priv = dev->dev_private;
4771         uint32_t rp_state_cap;
4772         u32 ddcc_status = 0;
4773         int ret;
4774
4775         /* All of these values are in units of 50MHz */
4776         dev_priv->rps.cur_freq          = 0;
4777         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4778         if (IS_BROXTON(dev)) {
4779                 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4780                 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4781                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4782                 dev_priv->rps.min_freq = (rp_state_cap >>  0) & 0xff;
4783         } else {
4784                 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4785                 dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
4786                 dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
4787                 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4788         }
4789
4790         /* hw_max = RP0 until we check for overclocking */
4791         dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
4792
4793         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4794         if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
4795             IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4796                 ret = sandybridge_pcode_read(dev_priv,
4797                                         HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4798                                         &ddcc_status);
4799                 if (0 == ret)
4800                         dev_priv->rps.efficient_freq =
4801                                 clamp_t(u8,
4802                                         ((ddcc_status >> 8) & 0xff),
4803                                         dev_priv->rps.min_freq,
4804                                         dev_priv->rps.max_freq);
4805         }
4806
4807         if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4808                 /* Store the frequency values in 16.66 MHZ units, which is
4809                    the natural hardware unit for SKL */
4810                 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4811                 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4812                 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4813                 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4814                 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4815         }
4816
4817         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4818
4819         /* Preserve min/max settings in case of re-init */
4820         if (dev_priv->rps.max_freq_softlimit == 0)
4821                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4822
4823         if (dev_priv->rps.min_freq_softlimit == 0) {
4824                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4825                         dev_priv->rps.min_freq_softlimit =
4826                                 max_t(int, dev_priv->rps.efficient_freq,
4827                                       intel_freq_opcode(dev_priv, 450));
4828                 else
4829                         dev_priv->rps.min_freq_softlimit =
4830                                 dev_priv->rps.min_freq;
4831         }
4832 }
4833
4834 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4835 static void gen9_enable_rps(struct drm_device *dev)
4836 {
4837         struct drm_i915_private *dev_priv = dev->dev_private;
4838
4839         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4840
4841         gen6_init_rps_frequencies(dev);
4842
4843         /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4844         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
4845                 /*
4846                  * BIOS could leave the Hw Turbo enabled, so need to explicitly
4847                  * clear out the Control register just to avoid inconsitency
4848                  * with debugfs interface, which will show  Turbo as enabled
4849                  * only and that is not expected by the User after adding the
4850                  * WaGsvDisableTurbo. Apart from this there is no problem even
4851                  * if the Turbo is left enabled in the Control register, as the
4852                  * Up/Down interrupts would remain masked.
4853                  */
4854                 gen9_disable_rps(dev);
4855                 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4856                 return;
4857         }
4858
4859         /* Program defaults and thresholds for RPS*/
4860         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4861                 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4862
4863         /* 1 second timeout*/
4864         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4865                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
4866
4867         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4868
4869         /* Leaning on the below call to gen6_set_rps to program/setup the
4870          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4871          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4872         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4873         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4874
4875         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4876 }
4877
4878 static void gen9_enable_rc6(struct drm_device *dev)
4879 {
4880         struct drm_i915_private *dev_priv = dev->dev_private;
4881         struct intel_engine_cs *engine;
4882         uint32_t rc6_mask = 0;
4883
4884         /* 1a: Software RC state - RC0 */
4885         I915_WRITE(GEN6_RC_STATE, 0);
4886
4887         /* 1b: Get forcewake during program sequence. Although the driver
4888          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4889         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4890
4891         /* 2a: Disable RC states. */
4892         I915_WRITE(GEN6_RC_CONTROL, 0);
4893
4894         /* 2b: Program RC6 thresholds.*/
4895
4896         /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
4897         if (IS_SKYLAKE(dev))
4898                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
4899         else
4900                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4901         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4902         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4903         for_each_engine(engine, dev_priv)
4904                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
4905
4906         if (HAS_GUC_UCODE(dev))
4907                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
4908
4909         I915_WRITE(GEN6_RC_SLEEP, 0);
4910
4911         /* 2c: Program Coarse Power Gating Policies. */
4912         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4913         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4914
4915         /* 3a: Enable RC6 */
4916         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4917                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4918         DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
4919         /* WaRsUseTimeoutMode */
4920         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
4921             IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
4922                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
4923                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4924                            GEN7_RC_CTL_TO_MODE |
4925                            rc6_mask);
4926         } else {
4927                 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4928                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4929                            GEN6_RC_CTL_EI_MODE(1) |
4930                            rc6_mask);
4931         }
4932
4933         /*
4934          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4935          * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
4936          */
4937         if (NEEDS_WaRsDisableCoarsePowerGating(dev))
4938                 I915_WRITE(GEN9_PG_ENABLE, 0);
4939         else
4940                 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4941                                 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
4942
4943         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4944
4945 }
4946
4947 static void gen8_enable_rps(struct drm_device *dev)
4948 {
4949         struct drm_i915_private *dev_priv = dev->dev_private;
4950         struct intel_engine_cs *engine;
4951         uint32_t rc6_mask = 0;
4952
4953         /* 1a: Software RC state - RC0 */
4954         I915_WRITE(GEN6_RC_STATE, 0);
4955
4956         /* 1c & 1d: Get forcewake during program sequence. Although the driver
4957          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4958         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4959
4960         /* 2a: Disable RC states. */
4961         I915_WRITE(GEN6_RC_CONTROL, 0);
4962
4963         /* Initialize rps frequencies */
4964         gen6_init_rps_frequencies(dev);
4965
4966         /* 2b: Program RC6 thresholds.*/
4967         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4968         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4969         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4970         for_each_engine(engine, dev_priv)
4971                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
4972         I915_WRITE(GEN6_RC_SLEEP, 0);
4973         if (IS_BROADWELL(dev))
4974                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4975         else
4976                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4977
4978         /* 3: Enable RC6 */
4979         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4980                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4981         intel_print_rc6_info(dev, rc6_mask);
4982         if (IS_BROADWELL(dev))
4983                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4984                                 GEN7_RC_CTL_TO_MODE |
4985                                 rc6_mask);
4986         else
4987                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4988                                 GEN6_RC_CTL_EI_MODE(1) |
4989                                 rc6_mask);
4990
4991         /* 4 Program defaults and thresholds for RPS*/
4992         I915_WRITE(GEN6_RPNSWREQ,
4993                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4994         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4995                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4996         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4997         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4998
4999         /* Docs recommend 900MHz, and 300 MHz respectively */
5000         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
5001                    dev_priv->rps.max_freq_softlimit << 24 |
5002                    dev_priv->rps.min_freq_softlimit << 16);
5003
5004         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
5005         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
5006         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
5007         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
5008
5009         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5010
5011         /* 5: Enable RPS */
5012         I915_WRITE(GEN6_RP_CONTROL,
5013                    GEN6_RP_MEDIA_TURBO |
5014                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5015                    GEN6_RP_MEDIA_IS_GFX |
5016                    GEN6_RP_ENABLE |
5017                    GEN6_RP_UP_BUSY_AVG |
5018                    GEN6_RP_DOWN_IDLE_AVG);
5019
5020         /* 6: Ring frequency + overclocking (our driver does this later */
5021
5022         dev_priv->rps.power = HIGH_POWER; /* force a reset */
5023         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5024
5025         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5026 }
5027
5028 static void gen6_enable_rps(struct drm_device *dev)
5029 {
5030         struct drm_i915_private *dev_priv = dev->dev_private;
5031         struct intel_engine_cs *engine;
5032         u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
5033         u32 gtfifodbg;
5034         int rc6_mode;
5035         int ret;
5036
5037         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5038
5039         /* Here begins a magic sequence of register writes to enable
5040          * auto-downclocking.
5041          *
5042          * Perhaps there might be some value in exposing these to
5043          * userspace...
5044          */
5045         I915_WRITE(GEN6_RC_STATE, 0);
5046
5047         /* Clear the DBG now so we don't confuse earlier errors */
5048         gtfifodbg = I915_READ(GTFIFODBG);
5049         if (gtfifodbg) {
5050                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
5051                 I915_WRITE(GTFIFODBG, gtfifodbg);
5052         }
5053
5054         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5055
5056         /* Initialize rps frequencies */
5057         gen6_init_rps_frequencies(dev);
5058
5059         /* disable the counters and set deterministic thresholds */
5060         I915_WRITE(GEN6_RC_CONTROL, 0);
5061
5062         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
5063         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
5064         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
5065         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5066         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5067
5068         for_each_engine(engine, dev_priv)
5069                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5070
5071         I915_WRITE(GEN6_RC_SLEEP, 0);
5072         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
5073         if (IS_IVYBRIDGE(dev))
5074                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
5075         else
5076                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
5077         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
5078         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
5079
5080         /* Check if we are enabling RC6 */
5081         rc6_mode = intel_enable_rc6(dev_priv->dev);
5082         if (rc6_mode & INTEL_RC6_ENABLE)
5083                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
5084
5085         /* We don't use those on Haswell */
5086         if (!IS_HASWELL(dev)) {
5087                 if (rc6_mode & INTEL_RC6p_ENABLE)
5088                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5089
5090                 if (rc6_mode & INTEL_RC6pp_ENABLE)
5091                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5092         }
5093
5094         intel_print_rc6_info(dev, rc6_mask);
5095
5096         I915_WRITE(GEN6_RC_CONTROL,
5097                    rc6_mask |
5098                    GEN6_RC_CTL_EI_MODE(1) |
5099                    GEN6_RC_CTL_HW_ENABLE);
5100
5101         /* Power down if completely idle for over 50ms */
5102         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5103         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5104
5105         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
5106         if (ret)
5107                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5108
5109         ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5110         if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5111                 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5112                                  (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5113                                  (pcu_mbox & 0xff) * 50);
5114                 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5115         }
5116
5117         dev_priv->rps.power = HIGH_POWER; /* force a reset */
5118         gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5119
5120         rc6vids = 0;
5121         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5122         if (IS_GEN6(dev) && ret) {
5123                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5124         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5125                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5126                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5127                 rc6vids &= 0xffff00;
5128                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
5129                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5130                 if (ret)
5131                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5132         }
5133
5134         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5135 }
5136
5137 static void __gen6_update_ring_freq(struct drm_device *dev)
5138 {
5139         struct drm_i915_private *dev_priv = dev->dev_private;
5140         int min_freq = 15;
5141         unsigned int gpu_freq;
5142         unsigned int max_ia_freq, min_ring_freq;
5143         unsigned int max_gpu_freq, min_gpu_freq;
5144         int scaling_factor = 180;
5145         struct cpufreq_policy *policy;
5146
5147         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5148
5149         policy = cpufreq_cpu_get(0);
5150         if (policy) {
5151                 max_ia_freq = policy->cpuinfo.max_freq;
5152                 cpufreq_cpu_put(policy);
5153         } else {
5154                 /*
5155                  * Default to measured freq if none found, PCU will ensure we
5156                  * don't go over
5157                  */
5158                 max_ia_freq = tsc_khz;
5159         }
5160
5161         /* Convert from kHz to MHz */
5162         max_ia_freq /= 1000;
5163
5164         min_ring_freq = I915_READ(DCLK) & 0xf;
5165         /* convert DDR frequency from units of 266.6MHz to bandwidth */
5166         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5167
5168         if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5169                 /* Convert GT frequency to 50 HZ units */
5170                 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5171                 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5172         } else {
5173                 min_gpu_freq = dev_priv->rps.min_freq;
5174                 max_gpu_freq = dev_priv->rps.max_freq;
5175         }
5176
5177         /*
5178          * For each potential GPU frequency, load a ring frequency we'd like
5179          * to use for memory access.  We do this by specifying the IA frequency
5180          * the PCU should use as a reference to determine the ring frequency.
5181          */
5182         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5183                 int diff = max_gpu_freq - gpu_freq;
5184                 unsigned int ia_freq = 0, ring_freq = 0;
5185
5186                 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5187                         /*
5188                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
5189                          * No floor required for ring frequency on SKL.
5190                          */
5191                         ring_freq = gpu_freq;
5192                 } else if (INTEL_INFO(dev)->gen >= 8) {
5193                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
5194                         ring_freq = max(min_ring_freq, gpu_freq);
5195                 } else if (IS_HASWELL(dev)) {
5196                         ring_freq = mult_frac(gpu_freq, 5, 4);
5197                         ring_freq = max(min_ring_freq, ring_freq);
5198                         /* leave ia_freq as the default, chosen by cpufreq */
5199                 } else {
5200                         /* On older processors, there is no separate ring
5201                          * clock domain, so in order to boost the bandwidth
5202                          * of the ring, we need to upclock the CPU (ia_freq).
5203                          *
5204                          * For GPU frequencies less than 750MHz,
5205                          * just use the lowest ring freq.
5206                          */
5207                         if (gpu_freq < min_freq)
5208                                 ia_freq = 800;
5209                         else
5210                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5211                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5212                 }
5213
5214                 sandybridge_pcode_write(dev_priv,
5215                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5216                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5217                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5218                                         gpu_freq);
5219         }
5220 }
5221
5222 void gen6_update_ring_freq(struct drm_device *dev)
5223 {
5224         struct drm_i915_private *dev_priv = dev->dev_private;
5225
5226         if (!HAS_CORE_RING_FREQ(dev))
5227                 return;
5228
5229         mutex_lock(&dev_priv->rps.hw_lock);
5230         __gen6_update_ring_freq(dev);
5231         mutex_unlock(&dev_priv->rps.hw_lock);
5232 }
5233
5234 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5235 {
5236         struct drm_device *dev = dev_priv->dev;
5237         u32 val, rp0;
5238
5239         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5240
5241         switch (INTEL_INFO(dev)->eu_total) {
5242         case 8:
5243                 /* (2 * 4) config */
5244                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5245                 break;
5246         case 12:
5247                 /* (2 * 6) config */
5248                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5249                 break;
5250         case 16:
5251                 /* (2 * 8) config */
5252         default:
5253                 /* Setting (2 * 8) Min RP0 for any other combination */
5254                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5255                 break;
5256         }
5257
5258         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5259
5260         return rp0;
5261 }
5262
5263 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5264 {
5265         u32 val, rpe;
5266
5267         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5268         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5269
5270         return rpe;
5271 }
5272
5273 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5274 {
5275         u32 val, rp1;
5276
5277         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5278         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5279
5280         return rp1;
5281 }
5282
5283 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5284 {
5285         u32 val, rp1;
5286
5287         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5288
5289         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5290
5291         return rp1;
5292 }
5293
5294 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5295 {
5296         u32 val, rp0;
5297
5298         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5299
5300         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5301         /* Clamp to max */
5302         rp0 = min_t(u32, rp0, 0xea);
5303
5304         return rp0;
5305 }
5306
5307 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5308 {
5309         u32 val, rpe;
5310
5311         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5312         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5313         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5314         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5315
5316         return rpe;
5317 }
5318
5319 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5320 {
5321         u32 val;
5322
5323         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5324         /*
5325          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
5326          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
5327          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
5328          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
5329          * to make sure it matches what Punit accepts.
5330          */
5331         return max_t(u32, val, 0xc0);
5332 }
5333
5334 /* Check that the pctx buffer wasn't move under us. */
5335 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5336 {
5337         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5338
5339         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5340                              dev_priv->vlv_pctx->stolen->start);
5341 }
5342
5343
5344 /* Check that the pcbr address is not empty. */
5345 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5346 {
5347         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5348
5349         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5350 }
5351
5352 static void cherryview_setup_pctx(struct drm_device *dev)
5353 {
5354         struct drm_i915_private *dev_priv = to_i915(dev);
5355         struct i915_ggtt *ggtt = &dev_priv->ggtt;
5356         unsigned long pctx_paddr, paddr;
5357         u32 pcbr;
5358         int pctx_size = 32*1024;
5359
5360         pcbr = I915_READ(VLV_PCBR);
5361         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5362                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5363                 paddr = (dev_priv->mm.stolen_base +
5364                          (ggtt->stolen_size - pctx_size));
5365
5366                 pctx_paddr = (paddr & (~4095));
5367                 I915_WRITE(VLV_PCBR, pctx_paddr);
5368         }
5369
5370         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5371 }
5372
5373 static void valleyview_setup_pctx(struct drm_device *dev)
5374 {
5375         struct drm_i915_private *dev_priv = dev->dev_private;
5376         struct drm_i915_gem_object *pctx;
5377         unsigned long pctx_paddr;
5378         u32 pcbr;
5379         int pctx_size = 24*1024;
5380
5381         mutex_lock(&dev->struct_mutex);
5382
5383         pcbr = I915_READ(VLV_PCBR);
5384         if (pcbr) {
5385                 /* BIOS set it up already, grab the pre-alloc'd space */
5386                 int pcbr_offset;
5387
5388                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5389                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5390                                                                       pcbr_offset,
5391                                                                       I915_GTT_OFFSET_NONE,
5392                                                                       pctx_size);
5393                 goto out;
5394         }
5395
5396         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5397
5398         /*
5399          * From the Gunit register HAS:
5400          * The Gfx driver is expected to program this register and ensure
5401          * proper allocation within Gfx stolen memory.  For example, this
5402          * register should be programmed such than the PCBR range does not
5403          * overlap with other ranges, such as the frame buffer, protected
5404          * memory, or any other relevant ranges.
5405          */
5406         pctx = i915_gem_object_create_stolen(dev, pctx_size);
5407         if (!pctx) {
5408                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5409                 goto out;
5410         }
5411
5412         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5413         I915_WRITE(VLV_PCBR, pctx_paddr);
5414
5415 out:
5416         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5417         dev_priv->vlv_pctx = pctx;
5418         mutex_unlock(&dev->struct_mutex);
5419 }
5420
5421 static void valleyview_cleanup_pctx(struct drm_device *dev)
5422 {
5423         struct drm_i915_private *dev_priv = dev->dev_private;
5424
5425         if (WARN_ON(!dev_priv->vlv_pctx))
5426                 return;
5427
5428         drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base);
5429         dev_priv->vlv_pctx = NULL;
5430 }
5431
5432 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
5433 {
5434         dev_priv->rps.gpll_ref_freq =
5435                 vlv_get_cck_clock(dev_priv, "GPLL ref",
5436                                   CCK_GPLL_CLOCK_CONTROL,
5437                                   dev_priv->czclk_freq);
5438
5439         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
5440                          dev_priv->rps.gpll_ref_freq);
5441 }
5442
5443 static void valleyview_init_gt_powersave(struct drm_device *dev)
5444 {
5445         struct drm_i915_private *dev_priv = dev->dev_private;
5446         u32 val;
5447
5448         valleyview_setup_pctx(dev);
5449
5450         vlv_init_gpll_ref_freq(dev_priv);
5451
5452         mutex_lock(&dev_priv->rps.hw_lock);
5453
5454         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5455         switch ((val >> 6) & 3) {
5456         case 0:
5457         case 1:
5458                 dev_priv->mem_freq = 800;
5459                 break;
5460         case 2:
5461                 dev_priv->mem_freq = 1066;
5462                 break;
5463         case 3:
5464                 dev_priv->mem_freq = 1333;
5465                 break;
5466         }
5467         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5468
5469         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5470         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5471         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5472                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5473                          dev_priv->rps.max_freq);
5474
5475         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5476         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5477                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5478                          dev_priv->rps.efficient_freq);
5479
5480         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5481         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5482                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5483                          dev_priv->rps.rp1_freq);
5484
5485         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5486         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5487                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5488                          dev_priv->rps.min_freq);
5489
5490         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5491
5492         /* Preserve min/max settings in case of re-init */
5493         if (dev_priv->rps.max_freq_softlimit == 0)
5494                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5495
5496         if (dev_priv->rps.min_freq_softlimit == 0)
5497                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5498
5499         mutex_unlock(&dev_priv->rps.hw_lock);
5500 }
5501
5502 static void cherryview_init_gt_powersave(struct drm_device *dev)
5503 {
5504         struct drm_i915_private *dev_priv = dev->dev_private;
5505         u32 val;
5506
5507         cherryview_setup_pctx(dev);
5508
5509         vlv_init_gpll_ref_freq(dev_priv);
5510
5511         mutex_lock(&dev_priv->rps.hw_lock);
5512
5513         mutex_lock(&dev_priv->sb_lock);
5514         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5515         mutex_unlock(&dev_priv->sb_lock);
5516
5517         switch ((val >> 2) & 0x7) {
5518         case 3:
5519                 dev_priv->mem_freq = 2000;
5520                 break;
5521         default:
5522                 dev_priv->mem_freq = 1600;
5523                 break;
5524         }
5525         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5526
5527         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5528         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5529         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5530                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5531                          dev_priv->rps.max_freq);
5532
5533         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5534         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5535                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5536                          dev_priv->rps.efficient_freq);
5537
5538         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5539         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5540                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5541                          dev_priv->rps.rp1_freq);
5542
5543         /* PUnit validated range is only [RPe, RP0] */
5544         dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5545         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5546                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5547                          dev_priv->rps.min_freq);
5548
5549         WARN_ONCE((dev_priv->rps.max_freq |
5550                    dev_priv->rps.efficient_freq |
5551                    dev_priv->rps.rp1_freq |
5552                    dev_priv->rps.min_freq) & 1,
5553                   "Odd GPU freq values\n");
5554
5555         dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5556
5557         /* Preserve min/max settings in case of re-init */
5558         if (dev_priv->rps.max_freq_softlimit == 0)
5559                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5560
5561         if (dev_priv->rps.min_freq_softlimit == 0)
5562                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5563
5564         mutex_unlock(&dev_priv->rps.hw_lock);
5565 }
5566
5567 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5568 {
5569         valleyview_cleanup_pctx(dev);
5570 }
5571
5572 static void cherryview_enable_rps(struct drm_device *dev)
5573 {
5574         struct drm_i915_private *dev_priv = dev->dev_private;
5575         struct intel_engine_cs *engine;
5576         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5577
5578         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5579
5580         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
5581                                              GT_FIFO_FREE_ENTRIES_CHV);
5582         if (gtfifodbg) {
5583                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5584                                  gtfifodbg);
5585                 I915_WRITE(GTFIFODBG, gtfifodbg);
5586         }
5587
5588         cherryview_check_pctx(dev_priv);
5589
5590         /* 1a & 1b: Get forcewake during program sequence. Although the driver
5591          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5592         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5593
5594         /*  Disable RC states. */
5595         I915_WRITE(GEN6_RC_CONTROL, 0);
5596
5597         /* 2a: Program RC6 thresholds.*/
5598         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5599         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5600         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5601
5602         for_each_engine(engine, dev_priv)
5603                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5604         I915_WRITE(GEN6_RC_SLEEP, 0);
5605
5606         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5607         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5608
5609         /* allows RC6 residency counter to work */
5610         I915_WRITE(VLV_COUNTER_CONTROL,
5611                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5612                                       VLV_MEDIA_RC6_COUNT_EN |
5613                                       VLV_RENDER_RC6_COUNT_EN));
5614
5615         /* For now we assume BIOS is allocating and populating the PCBR  */
5616         pcbr = I915_READ(VLV_PCBR);
5617
5618         /* 3: Enable RC6 */
5619         if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5620                                                 (pcbr >> VLV_PCBR_ADDR_SHIFT))
5621                 rc6_mode = GEN7_RC_CTL_TO_MODE;
5622
5623         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5624
5625         /* 4 Program defaults and thresholds for RPS*/
5626         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5627         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5628         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5629         I915_WRITE(GEN6_RP_UP_EI, 66000);
5630         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5631
5632         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5633
5634         /* 5: Enable RPS */
5635         I915_WRITE(GEN6_RP_CONTROL,
5636                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5637                    GEN6_RP_MEDIA_IS_GFX |
5638                    GEN6_RP_ENABLE |
5639                    GEN6_RP_UP_BUSY_AVG |
5640                    GEN6_RP_DOWN_IDLE_AVG);
5641
5642         /* Setting Fixed Bias */
5643         val = VLV_OVERRIDE_EN |
5644                   VLV_SOC_TDP_EN |
5645                   CHV_BIAS_CPU_50_SOC_50;
5646         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5647
5648         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5649
5650         /* RPS code assumes GPLL is used */
5651         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5652
5653         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5654         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5655
5656         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5657         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5658                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5659                          dev_priv->rps.cur_freq);
5660
5661         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5662                          intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
5663                          dev_priv->rps.idle_freq);
5664
5665         valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5666
5667         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5668 }
5669
5670 static void valleyview_enable_rps(struct drm_device *dev)
5671 {
5672         struct drm_i915_private *dev_priv = dev->dev_private;
5673         struct intel_engine_cs *engine;
5674         u32 gtfifodbg, val, rc6_mode = 0;
5675
5676         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5677
5678         valleyview_check_pctx(dev_priv);
5679
5680         gtfifodbg = I915_READ(GTFIFODBG);
5681         if (gtfifodbg) {
5682                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5683                                  gtfifodbg);
5684                 I915_WRITE(GTFIFODBG, gtfifodbg);
5685         }
5686
5687         /* If VLV, Forcewake all wells, else re-direct to regular path */
5688         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5689
5690         /*  Disable RC states. */
5691         I915_WRITE(GEN6_RC_CONTROL, 0);
5692
5693         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5694         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5695         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5696         I915_WRITE(GEN6_RP_UP_EI, 66000);
5697         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5698
5699         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5700
5701         I915_WRITE(GEN6_RP_CONTROL,
5702                    GEN6_RP_MEDIA_TURBO |
5703                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
5704                    GEN6_RP_MEDIA_IS_GFX |
5705                    GEN6_RP_ENABLE |
5706                    GEN6_RP_UP_BUSY_AVG |
5707                    GEN6_RP_DOWN_IDLE_CONT);
5708
5709         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5710         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5711         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5712
5713         for_each_engine(engine, dev_priv)
5714                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
5715
5716         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5717
5718         /* allows RC6 residency counter to work */
5719         I915_WRITE(VLV_COUNTER_CONTROL,
5720                    _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5721                                       VLV_RENDER_RC0_COUNT_EN |
5722                                       VLV_MEDIA_RC6_COUNT_EN |
5723                                       VLV_RENDER_RC6_COUNT_EN));
5724
5725         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5726                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5727
5728         intel_print_rc6_info(dev, rc6_mode);
5729
5730         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5731
5732         /* Setting Fixed Bias */
5733         val = VLV_OVERRIDE_EN |
5734                   VLV_SOC_TDP_EN |
5735                   VLV_BIAS_CPU_125_SOC_875;
5736         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5737
5738         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5739
5740         /* RPS code assumes GPLL is used */
5741         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5742
5743         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5744         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5745
5746         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5747         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5748                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5749                          dev_priv->rps.cur_freq);
5750
5751         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5752                          intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq),
5753                          dev_priv->rps.idle_freq);
5754
5755         valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5756
5757         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5758 }
5759
5760 static unsigned long intel_pxfreq(u32 vidfreq)
5761 {
5762         unsigned long freq;
5763         int div = (vidfreq & 0x3f0000) >> 16;
5764         int post = (vidfreq & 0x3000) >> 12;
5765         int pre = (vidfreq & 0x7);
5766
5767         if (!pre)
5768                 return 0;
5769
5770         freq = ((div * 133333) / ((1<<post) * pre));
5771
5772         return freq;
5773 }
5774
5775 static const struct cparams {
5776         u16 i;
5777         u16 t;
5778         u16 m;
5779         u16 c;
5780 } cparams[] = {
5781         { 1, 1333, 301, 28664 },
5782         { 1, 1066, 294, 24460 },
5783         { 1, 800, 294, 25192 },
5784         { 0, 1333, 276, 27605 },
5785         { 0, 1066, 276, 27605 },
5786         { 0, 800, 231, 23784 },
5787 };
5788
5789 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5790 {
5791         u64 total_count, diff, ret;
5792         u32 count1, count2, count3, m = 0, c = 0;
5793         unsigned long now = jiffies_to_msecs(jiffies), diff1;
5794         int i;
5795
5796         assert_spin_locked(&mchdev_lock);
5797
5798         diff1 = now - dev_priv->ips.last_time1;
5799
5800         /* Prevent division-by-zero if we are asking too fast.
5801          * Also, we don't get interesting results if we are polling
5802          * faster than once in 10ms, so just return the saved value
5803          * in such cases.
5804          */
5805         if (diff1 <= 10)
5806                 return dev_priv->ips.chipset_power;
5807
5808         count1 = I915_READ(DMIEC);
5809         count2 = I915_READ(DDREC);
5810         count3 = I915_READ(CSIEC);
5811
5812         total_count = count1 + count2 + count3;
5813
5814         /* FIXME: handle per-counter overflow */
5815         if (total_count < dev_priv->ips.last_count1) {
5816                 diff = ~0UL - dev_priv->ips.last_count1;
5817                 diff += total_count;
5818         } else {
5819                 diff = total_count - dev_priv->ips.last_count1;
5820         }
5821
5822         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5823                 if (cparams[i].i == dev_priv->ips.c_m &&
5824                     cparams[i].t == dev_priv->ips.r_t) {
5825                         m = cparams[i].m;
5826                         c = cparams[i].c;
5827                         break;
5828                 }
5829         }
5830
5831         diff = div_u64(diff, diff1);
5832         ret = ((m * diff) + c);
5833         ret = div_u64(ret, 10);
5834
5835         dev_priv->ips.last_count1 = total_count;
5836         dev_priv->ips.last_time1 = now;
5837
5838         dev_priv->ips.chipset_power = ret;
5839
5840         return ret;
5841 }
5842
5843 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5844 {
5845         struct drm_device *dev = dev_priv->dev;
5846         unsigned long val;
5847
5848         if (INTEL_INFO(dev)->gen != 5)
5849                 return 0;
5850
5851         spin_lock_irq(&mchdev_lock);
5852
5853         val = __i915_chipset_val(dev_priv);
5854
5855         spin_unlock_irq(&mchdev_lock);
5856
5857         return val;
5858 }
5859
5860 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5861 {
5862         unsigned long m, x, b;
5863         u32 tsfs;
5864
5865         tsfs = I915_READ(TSFS);
5866
5867         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5868         x = I915_READ8(TR1);
5869
5870         b = tsfs & TSFS_INTR_MASK;
5871
5872         return ((m * x) / 127) - b;
5873 }
5874
5875 static int _pxvid_to_vd(u8 pxvid)
5876 {
5877         if (pxvid == 0)
5878                 return 0;
5879
5880         if (pxvid >= 8 && pxvid < 31)
5881                 pxvid = 31;
5882
5883         return (pxvid + 2) * 125;
5884 }
5885
5886 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5887 {
5888         struct drm_device *dev = dev_priv->dev;
5889         const int vd = _pxvid_to_vd(pxvid);
5890         const int vm = vd - 1125;
5891
5892         if (INTEL_INFO(dev)->is_mobile)
5893                 return vm > 0 ? vm : 0;
5894
5895         return vd;
5896 }
5897
5898 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5899 {
5900         u64 now, diff, diffms;
5901         u32 count;
5902
5903         assert_spin_locked(&mchdev_lock);
5904
5905         now = ktime_get_raw_ns();
5906         diffms = now - dev_priv->ips.last_time2;
5907         do_div(diffms, NSEC_PER_MSEC);
5908
5909         /* Don't divide by 0 */
5910         if (!diffms)
5911                 return;
5912
5913         count = I915_READ(GFXEC);
5914
5915         if (count < dev_priv->ips.last_count2) {
5916                 diff = ~0UL - dev_priv->ips.last_count2;
5917                 diff += count;
5918         } else {
5919                 diff = count - dev_priv->ips.last_count2;
5920         }
5921
5922         dev_priv->ips.last_count2 = count;
5923         dev_priv->ips.last_time2 = now;
5924
5925         /* More magic constants... */
5926         diff = diff * 1181;
5927         diff = div_u64(diff, diffms * 10);
5928         dev_priv->ips.gfx_power = diff;
5929 }
5930
5931 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5932 {
5933         struct drm_device *dev = dev_priv->dev;
5934
5935         if (INTEL_INFO(dev)->gen != 5)
5936                 return;
5937
5938         spin_lock_irq(&mchdev_lock);
5939
5940         __i915_update_gfx_val(dev_priv);
5941
5942         spin_unlock_irq(&mchdev_lock);
5943 }
5944
5945 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5946 {
5947         unsigned long t, corr, state1, corr2, state2;
5948         u32 pxvid, ext_v;
5949
5950         assert_spin_locked(&mchdev_lock);
5951
5952         pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
5953         pxvid = (pxvid >> 24) & 0x7f;
5954         ext_v = pvid_to_extvid(dev_priv, pxvid);
5955
5956         state1 = ext_v;
5957
5958         t = i915_mch_val(dev_priv);
5959
5960         /* Revel in the empirically derived constants */
5961
5962         /* Correction factor in 1/100000 units */
5963         if (t > 80)
5964                 corr = ((t * 2349) + 135940);
5965         else if (t >= 50)
5966                 corr = ((t * 964) + 29317);
5967         else /* < 50 */
5968                 corr = ((t * 301) + 1004);
5969
5970         corr = corr * ((150142 * state1) / 10000 - 78642);
5971         corr /= 100000;
5972         corr2 = (corr * dev_priv->ips.corr);
5973
5974         state2 = (corr2 * state1) / 10000;
5975         state2 /= 100; /* convert to mW */
5976
5977         __i915_update_gfx_val(dev_priv);
5978
5979         return dev_priv->ips.gfx_power + state2;
5980 }
5981
5982 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5983 {
5984         struct drm_device *dev = dev_priv->dev;
5985         unsigned long val;
5986
5987         if (INTEL_INFO(dev)->gen != 5)
5988                 return 0;
5989
5990         spin_lock_irq(&mchdev_lock);
5991
5992         val = __i915_gfx_val(dev_priv);
5993
5994         spin_unlock_irq(&mchdev_lock);
5995
5996         return val;
5997 }
5998
5999 /**
6000  * i915_read_mch_val - return value for IPS use
6001  *
6002  * Calculate and return a value for the IPS driver to use when deciding whether
6003  * we have thermal and power headroom to increase CPU or GPU power budget.
6004  */
6005 unsigned long i915_read_mch_val(void)
6006 {
6007         struct drm_i915_private *dev_priv;
6008         unsigned long chipset_val, graphics_val, ret = 0;
6009
6010         spin_lock_irq(&mchdev_lock);
6011         if (!i915_mch_dev)
6012                 goto out_unlock;
6013         dev_priv = i915_mch_dev;
6014
6015         chipset_val = __i915_chipset_val(dev_priv);
6016         graphics_val = __i915_gfx_val(dev_priv);
6017
6018         ret = chipset_val + graphics_val;
6019
6020 out_unlock:
6021         spin_unlock_irq(&mchdev_lock);
6022
6023         return ret;
6024 }
6025 EXPORT_SYMBOL_GPL(i915_read_mch_val);
6026
6027 /**
6028  * i915_gpu_raise - raise GPU frequency limit
6029  *
6030  * Raise the limit; IPS indicates we have thermal headroom.
6031  */
6032 bool i915_gpu_raise(void)
6033 {
6034         struct drm_i915_private *dev_priv;
6035         bool ret = true;
6036
6037         spin_lock_irq(&mchdev_lock);
6038         if (!i915_mch_dev) {
6039                 ret = false;
6040                 goto out_unlock;
6041         }
6042         dev_priv = i915_mch_dev;
6043
6044         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
6045                 dev_priv->ips.max_delay--;
6046
6047 out_unlock:
6048         spin_unlock_irq(&mchdev_lock);
6049
6050         return ret;
6051 }
6052 EXPORT_SYMBOL_GPL(i915_gpu_raise);
6053
6054 /**
6055  * i915_gpu_lower - lower GPU frequency limit
6056  *
6057  * IPS indicates we're close to a thermal limit, so throttle back the GPU
6058  * frequency maximum.
6059  */
6060 bool i915_gpu_lower(void)
6061 {
6062         struct drm_i915_private *dev_priv;
6063         bool ret = true;
6064
6065         spin_lock_irq(&mchdev_lock);
6066         if (!i915_mch_dev) {
6067                 ret = false;
6068                 goto out_unlock;
6069         }
6070         dev_priv = i915_mch_dev;
6071
6072         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
6073                 dev_priv->ips.max_delay++;
6074
6075 out_unlock:
6076         spin_unlock_irq(&mchdev_lock);
6077
6078         return ret;
6079 }
6080 EXPORT_SYMBOL_GPL(i915_gpu_lower);
6081
6082 /**
6083  * i915_gpu_busy - indicate GPU business to IPS
6084  *
6085  * Tell the IPS driver whether or not the GPU is busy.
6086  */
6087 bool i915_gpu_busy(void)
6088 {
6089         struct drm_i915_private *dev_priv;
6090         struct intel_engine_cs *engine;
6091         bool ret = false;
6092
6093         spin_lock_irq(&mchdev_lock);
6094         if (!i915_mch_dev)
6095                 goto out_unlock;
6096         dev_priv = i915_mch_dev;
6097
6098         for_each_engine(engine, dev_priv)
6099                 ret |= !list_empty(&engine->request_list);
6100
6101 out_unlock:
6102         spin_unlock_irq(&mchdev_lock);
6103
6104         return ret;
6105 }
6106 EXPORT_SYMBOL_GPL(i915_gpu_busy);
6107
6108 /**
6109  * i915_gpu_turbo_disable - disable graphics turbo
6110  *
6111  * Disable graphics turbo by resetting the max frequency and setting the
6112  * current frequency to the default.
6113  */
6114 bool i915_gpu_turbo_disable(void)
6115 {
6116         struct drm_i915_private *dev_priv;
6117         bool ret = true;
6118
6119         spin_lock_irq(&mchdev_lock);
6120         if (!i915_mch_dev) {
6121                 ret = false;
6122                 goto out_unlock;
6123         }
6124         dev_priv = i915_mch_dev;
6125
6126         dev_priv->ips.max_delay = dev_priv->ips.fstart;
6127
6128         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
6129                 ret = false;
6130
6131 out_unlock:
6132         spin_unlock_irq(&mchdev_lock);
6133
6134         return ret;
6135 }
6136 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6137
6138 /**
6139  * Tells the intel_ips driver that the i915 driver is now loaded, if
6140  * IPS got loaded first.
6141  *
6142  * This awkward dance is so that neither module has to depend on the
6143  * other in order for IPS to do the appropriate communication of
6144  * GPU turbo limits to i915.
6145  */
6146 static void
6147 ips_ping_for_i915_load(void)
6148 {
6149         void (*link)(void);
6150
6151         link = symbol_get(ips_link_to_i915_driver);
6152         if (link) {
6153                 link();
6154                 symbol_put(ips_link_to_i915_driver);
6155         }
6156 }
6157
6158 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6159 {
6160         /* We only register the i915 ips part with intel-ips once everything is
6161          * set up, to avoid intel-ips sneaking in and reading bogus values. */
6162         spin_lock_irq(&mchdev_lock);
6163         i915_mch_dev = dev_priv;
6164         spin_unlock_irq(&mchdev_lock);
6165
6166         ips_ping_for_i915_load();
6167 }
6168
6169 void intel_gpu_ips_teardown(void)
6170 {
6171         spin_lock_irq(&mchdev_lock);
6172         i915_mch_dev = NULL;
6173         spin_unlock_irq(&mchdev_lock);
6174 }
6175
6176 static void intel_init_emon(struct drm_device *dev)
6177 {
6178         struct drm_i915_private *dev_priv = dev->dev_private;
6179         u32 lcfuse;
6180         u8 pxw[16];
6181         int i;
6182
6183         /* Disable to program */
6184         I915_WRITE(ECR, 0);
6185         POSTING_READ(ECR);
6186
6187         /* Program energy weights for various events */
6188         I915_WRITE(SDEW, 0x15040d00);
6189         I915_WRITE(CSIEW0, 0x007f0000);
6190         I915_WRITE(CSIEW1, 0x1e220004);
6191         I915_WRITE(CSIEW2, 0x04000004);
6192
6193         for (i = 0; i < 5; i++)
6194                 I915_WRITE(PEW(i), 0);
6195         for (i = 0; i < 3; i++)
6196                 I915_WRITE(DEW(i), 0);
6197
6198         /* Program P-state weights to account for frequency power adjustment */
6199         for (i = 0; i < 16; i++) {
6200                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
6201                 unsigned long freq = intel_pxfreq(pxvidfreq);
6202                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6203                         PXVFREQ_PX_SHIFT;
6204                 unsigned long val;
6205
6206                 val = vid * vid;
6207                 val *= (freq / 1000);
6208                 val *= 255;
6209                 val /= (127*127*900);
6210                 if (val > 0xff)
6211                         DRM_ERROR("bad pxval: %ld\n", val);
6212                 pxw[i] = val;
6213         }
6214         /* Render standby states get 0 weight */
6215         pxw[14] = 0;
6216         pxw[15] = 0;
6217
6218         for (i = 0; i < 4; i++) {
6219                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
6220                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
6221                 I915_WRITE(PXW(i), val);
6222         }
6223
6224         /* Adjust magic regs to magic values (more experimental results) */
6225         I915_WRITE(OGW0, 0);
6226         I915_WRITE(OGW1, 0);
6227         I915_WRITE(EG0, 0x00007f00);
6228         I915_WRITE(EG1, 0x0000000e);
6229         I915_WRITE(EG2, 0x000e0000);
6230         I915_WRITE(EG3, 0x68000300);
6231         I915_WRITE(EG4, 0x42000000);
6232         I915_WRITE(EG5, 0x00140031);
6233         I915_WRITE(EG6, 0);
6234         I915_WRITE(EG7, 0);
6235
6236         for (i = 0; i < 8; i++)
6237                 I915_WRITE(PXWL(i), 0);
6238
6239         /* Enable PMON + select events */
6240         I915_WRITE(ECR, 0x80000019);
6241
6242         lcfuse = I915_READ(LCFUSE02);
6243
6244         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6245 }
6246
6247 void intel_init_gt_powersave(struct drm_device *dev)
6248 {
6249         struct drm_i915_private *dev_priv = dev->dev_private;
6250
6251         /*
6252          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
6253          * requirement.
6254          */
6255         if (!i915.enable_rc6) {
6256                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
6257                 intel_runtime_pm_get(dev_priv);
6258         }
6259
6260         if (IS_CHERRYVIEW(dev))
6261                 cherryview_init_gt_powersave(dev);
6262         else if (IS_VALLEYVIEW(dev))
6263                 valleyview_init_gt_powersave(dev);
6264 }
6265
6266 void intel_cleanup_gt_powersave(struct drm_device *dev)
6267 {
6268         struct drm_i915_private *dev_priv = dev->dev_private;
6269
6270         if (IS_CHERRYVIEW(dev))
6271                 return;
6272         else if (IS_VALLEYVIEW(dev))
6273                 valleyview_cleanup_gt_powersave(dev);
6274
6275         if (!i915.enable_rc6)
6276                 intel_runtime_pm_put(dev_priv);
6277 }
6278
6279 static void gen6_suspend_rps(struct drm_device *dev)
6280 {
6281         struct drm_i915_private *dev_priv = dev->dev_private;
6282
6283         flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6284
6285         gen6_disable_rps_interrupts(dev);
6286 }
6287
6288 /**
6289  * intel_suspend_gt_powersave - suspend PM work and helper threads
6290  * @dev: drm device
6291  *
6292  * We don't want to disable RC6 or other features here, we just want
6293  * to make sure any work we've queued has finished and won't bother
6294  * us while we're suspended.
6295  */
6296 void intel_suspend_gt_powersave(struct drm_device *dev)
6297 {
6298         struct drm_i915_private *dev_priv = dev->dev_private;
6299
6300         if (INTEL_INFO(dev)->gen < 6)
6301                 return;
6302
6303         gen6_suspend_rps(dev);
6304
6305         /* Force GPU to min freq during suspend */
6306         gen6_rps_idle(dev_priv);
6307 }
6308
6309 void intel_disable_gt_powersave(struct drm_device *dev)
6310 {
6311         struct drm_i915_private *dev_priv = dev->dev_private;
6312
6313         if (IS_IRONLAKE_M(dev)) {
6314                 ironlake_disable_drps(dev);
6315         } else if (INTEL_INFO(dev)->gen >= 6) {
6316                 intel_suspend_gt_powersave(dev);
6317
6318                 mutex_lock(&dev_priv->rps.hw_lock);
6319                 if (INTEL_INFO(dev)->gen >= 9) {
6320                         gen9_disable_rc6(dev);
6321                         gen9_disable_rps(dev);
6322                 } else if (IS_CHERRYVIEW(dev))
6323                         cherryview_disable_rps(dev);
6324                 else if (IS_VALLEYVIEW(dev))
6325                         valleyview_disable_rps(dev);
6326                 else
6327                         gen6_disable_rps(dev);
6328
6329                 dev_priv->rps.enabled = false;
6330                 mutex_unlock(&dev_priv->rps.hw_lock);
6331         }
6332 }
6333
6334 static void intel_gen6_powersave_work(struct work_struct *work)
6335 {
6336         struct drm_i915_private *dev_priv =
6337                 container_of(work, struct drm_i915_private,
6338                              rps.delayed_resume_work.work);
6339         struct drm_device *dev = dev_priv->dev;
6340
6341         mutex_lock(&dev_priv->rps.hw_lock);
6342
6343         gen6_reset_rps_interrupts(dev);
6344
6345         if (IS_CHERRYVIEW(dev)) {
6346                 cherryview_enable_rps(dev);
6347         } else if (IS_VALLEYVIEW(dev)) {
6348                 valleyview_enable_rps(dev);
6349         } else if (INTEL_INFO(dev)->gen >= 9) {
6350                 gen9_enable_rc6(dev);
6351                 gen9_enable_rps(dev);
6352                 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
6353                         __gen6_update_ring_freq(dev);
6354         } else if (IS_BROADWELL(dev)) {
6355                 gen8_enable_rps(dev);
6356                 __gen6_update_ring_freq(dev);
6357         } else {
6358                 gen6_enable_rps(dev);
6359                 __gen6_update_ring_freq(dev);
6360         }
6361
6362         WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6363         WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6364
6365         WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6366         WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6367
6368         dev_priv->rps.enabled = true;
6369
6370         gen6_enable_rps_interrupts(dev);
6371
6372         mutex_unlock(&dev_priv->rps.hw_lock);
6373
6374         intel_runtime_pm_put(dev_priv);
6375 }
6376
6377 void intel_enable_gt_powersave(struct drm_device *dev)
6378 {
6379         struct drm_i915_private *dev_priv = dev->dev_private;
6380
6381         /* Powersaving is controlled by the host when inside a VM */
6382         if (intel_vgpu_active(dev))
6383                 return;
6384
6385         if (IS_IRONLAKE_M(dev)) {
6386                 ironlake_enable_drps(dev);
6387                 mutex_lock(&dev->struct_mutex);
6388                 intel_init_emon(dev);
6389                 mutex_unlock(&dev->struct_mutex);
6390         } else if (INTEL_INFO(dev)->gen >= 6) {
6391                 /*
6392                  * PCU communication is slow and this doesn't need to be
6393                  * done at any specific time, so do this out of our fast path
6394                  * to make resume and init faster.
6395                  *
6396                  * We depend on the HW RC6 power context save/restore
6397                  * mechanism when entering D3 through runtime PM suspend. So
6398                  * disable RPM until RPS/RC6 is properly setup. We can only
6399                  * get here via the driver load/system resume/runtime resume
6400                  * paths, so the _noresume version is enough (and in case of
6401                  * runtime resume it's necessary).
6402                  */
6403                 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6404                                            round_jiffies_up_relative(HZ)))
6405                         intel_runtime_pm_get_noresume(dev_priv);
6406         }
6407 }
6408
6409 void intel_reset_gt_powersave(struct drm_device *dev)
6410 {
6411         struct drm_i915_private *dev_priv = dev->dev_private;
6412
6413         if (INTEL_INFO(dev)->gen < 6)
6414                 return;
6415
6416         gen6_suspend_rps(dev);
6417         dev_priv->rps.enabled = false;
6418 }
6419
6420 static void ibx_init_clock_gating(struct drm_device *dev)
6421 {
6422         struct drm_i915_private *dev_priv = dev->dev_private;
6423
6424         /*
6425          * On Ibex Peak and Cougar Point, we need to disable clock
6426          * gating for the panel power sequencer or it will fail to
6427          * start up when no ports are active.
6428          */
6429         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6430 }
6431
6432 static void g4x_disable_trickle_feed(struct drm_device *dev)
6433 {
6434         struct drm_i915_private *dev_priv = dev->dev_private;
6435         enum pipe pipe;
6436
6437         for_each_pipe(dev_priv, pipe) {
6438                 I915_WRITE(DSPCNTR(pipe),
6439                            I915_READ(DSPCNTR(pipe)) |
6440                            DISPPLANE_TRICKLE_FEED_DISABLE);
6441
6442                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6443                 POSTING_READ(DSPSURF(pipe));
6444         }
6445 }
6446
6447 static void ilk_init_lp_watermarks(struct drm_device *dev)
6448 {
6449         struct drm_i915_private *dev_priv = dev->dev_private;
6450
6451         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6452         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6453         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6454
6455         /*
6456          * Don't touch WM1S_LP_EN here.
6457          * Doing so could cause underruns.
6458          */
6459 }
6460
6461 static void ironlake_init_clock_gating(struct drm_device *dev)
6462 {
6463         struct drm_i915_private *dev_priv = dev->dev_private;
6464         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6465
6466         /*
6467          * Required for FBC
6468          * WaFbcDisableDpfcClockGating:ilk
6469          */
6470         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6471                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6472                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6473
6474         I915_WRITE(PCH_3DCGDIS0,
6475                    MARIUNIT_CLOCK_GATE_DISABLE |
6476                    SVSMUNIT_CLOCK_GATE_DISABLE);
6477         I915_WRITE(PCH_3DCGDIS1,
6478                    VFMUNIT_CLOCK_GATE_DISABLE);
6479
6480         /*
6481          * According to the spec the following bits should be set in
6482          * order to enable memory self-refresh
6483          * The bit 22/21 of 0x42004
6484          * The bit 5 of 0x42020
6485          * The bit 15 of 0x45000
6486          */
6487         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6488                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
6489                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6490         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6491         I915_WRITE(DISP_ARB_CTL,
6492                    (I915_READ(DISP_ARB_CTL) |
6493                     DISP_FBC_WM_DIS));
6494
6495         ilk_init_lp_watermarks(dev);
6496
6497         /*
6498          * Based on the document from hardware guys the following bits
6499          * should be set unconditionally in order to enable FBC.
6500          * The bit 22 of 0x42000
6501          * The bit 22 of 0x42004
6502          * The bit 7,8,9 of 0x42020.
6503          */
6504         if (IS_IRONLAKE_M(dev)) {
6505                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6506                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6507                            I915_READ(ILK_DISPLAY_CHICKEN1) |
6508                            ILK_FBCQ_DIS);
6509                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6510                            I915_READ(ILK_DISPLAY_CHICKEN2) |
6511                            ILK_DPARB_GATE);
6512         }
6513
6514         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6515
6516         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6517                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6518                    ILK_ELPIN_409_SELECT);
6519         I915_WRITE(_3D_CHICKEN2,
6520                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6521                    _3D_CHICKEN2_WM_READ_PIPELINED);
6522
6523         /* WaDisableRenderCachePipelinedFlush:ilk */
6524         I915_WRITE(CACHE_MODE_0,
6525                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6526
6527         /* WaDisable_RenderCache_OperationalFlush:ilk */
6528         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6529
6530         g4x_disable_trickle_feed(dev);
6531
6532         ibx_init_clock_gating(dev);
6533 }
6534
6535 static void cpt_init_clock_gating(struct drm_device *dev)
6536 {
6537         struct drm_i915_private *dev_priv = dev->dev_private;
6538         int pipe;
6539         uint32_t val;
6540
6541         /*
6542          * On Ibex Peak and Cougar Point, we need to disable clock
6543          * gating for the panel power sequencer or it will fail to
6544          * start up when no ports are active.
6545          */
6546         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6547                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6548                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
6549         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6550                    DPLS_EDP_PPS_FIX_DIS);
6551         /* The below fixes the weird display corruption, a few pixels shifted
6552          * downward, on (only) LVDS of some HP laptops with IVY.
6553          */
6554         for_each_pipe(dev_priv, pipe) {
6555                 val = I915_READ(TRANS_CHICKEN2(pipe));
6556                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6557                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6558                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6559                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6560                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6561                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6562                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6563                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6564         }
6565         /* WADP0ClockGatingDisable */
6566         for_each_pipe(dev_priv, pipe) {
6567                 I915_WRITE(TRANS_CHICKEN1(pipe),
6568                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6569         }
6570 }
6571
6572 static void gen6_check_mch_setup(struct drm_device *dev)
6573 {
6574         struct drm_i915_private *dev_priv = dev->dev_private;
6575         uint32_t tmp;
6576
6577         tmp = I915_READ(MCH_SSKPD);
6578         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6579                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6580                               tmp);
6581 }
6582
6583 static void gen6_init_clock_gating(struct drm_device *dev)
6584 {
6585         struct drm_i915_private *dev_priv = dev->dev_private;
6586         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6587
6588         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6589
6590         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6591                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6592                    ILK_ELPIN_409_SELECT);
6593
6594         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6595         I915_WRITE(_3D_CHICKEN,
6596                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6597
6598         /* WaDisable_RenderCache_OperationalFlush:snb */
6599         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6600
6601         /*
6602          * BSpec recoomends 8x4 when MSAA is used,
6603          * however in practice 16x4 seems fastest.
6604          *
6605          * Note that PS/WM thread counts depend on the WIZ hashing
6606          * disable bit, which we don't touch here, but it's good
6607          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6608          */
6609         I915_WRITE(GEN6_GT_MODE,
6610                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6611
6612         ilk_init_lp_watermarks(dev);
6613
6614         I915_WRITE(CACHE_MODE_0,
6615                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6616
6617         I915_WRITE(GEN6_UCGCTL1,
6618                    I915_READ(GEN6_UCGCTL1) |
6619                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6620                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6621
6622         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6623          * gating disable must be set.  Failure to set it results in
6624          * flickering pixels due to Z write ordering failures after
6625          * some amount of runtime in the Mesa "fire" demo, and Unigine
6626          * Sanctuary and Tropics, and apparently anything else with
6627          * alpha test or pixel discard.
6628          *
6629          * According to the spec, bit 11 (RCCUNIT) must also be set,
6630          * but we didn't debug actual testcases to find it out.
6631          *
6632          * WaDisableRCCUnitClockGating:snb
6633          * WaDisableRCPBUnitClockGating:snb
6634          */
6635         I915_WRITE(GEN6_UCGCTL2,
6636                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6637                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6638
6639         /* WaStripsFansDisableFastClipPerformanceFix:snb */
6640         I915_WRITE(_3D_CHICKEN3,
6641                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6642
6643         /*
6644          * Bspec says:
6645          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6646          * 3DSTATE_SF number of SF output attributes is more than 16."
6647          */
6648         I915_WRITE(_3D_CHICKEN3,
6649                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6650
6651         /*
6652          * According to the spec the following bits should be
6653          * set in order to enable memory self-refresh and fbc:
6654          * The bit21 and bit22 of 0x42000
6655          * The bit21 and bit22 of 0x42004
6656          * The bit5 and bit7 of 0x42020
6657          * The bit14 of 0x70180
6658          * The bit14 of 0x71180
6659          *
6660          * WaFbcAsynchFlipDisableFbcQueue:snb
6661          */
6662         I915_WRITE(ILK_DISPLAY_CHICKEN1,
6663                    I915_READ(ILK_DISPLAY_CHICKEN1) |
6664                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6665         I915_WRITE(ILK_DISPLAY_CHICKEN2,
6666                    I915_READ(ILK_DISPLAY_CHICKEN2) |
6667                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6668         I915_WRITE(ILK_DSPCLK_GATE_D,
6669                    I915_READ(ILK_DSPCLK_GATE_D) |
6670                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
6671                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6672
6673         g4x_disable_trickle_feed(dev);
6674
6675         cpt_init_clock_gating(dev);
6676
6677         gen6_check_mch_setup(dev);
6678 }
6679
6680 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6681 {
6682         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6683
6684         /*
6685          * WaVSThreadDispatchOverride:ivb,vlv
6686          *
6687          * This actually overrides the dispatch
6688          * mode for all thread types.
6689          */
6690         reg &= ~GEN7_FF_SCHED_MASK;
6691         reg |= GEN7_FF_TS_SCHED_HW;
6692         reg |= GEN7_FF_VS_SCHED_HW;
6693         reg |= GEN7_FF_DS_SCHED_HW;
6694
6695         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6696 }
6697
6698 static void lpt_init_clock_gating(struct drm_device *dev)
6699 {
6700         struct drm_i915_private *dev_priv = dev->dev_private;
6701
6702         /*
6703          * TODO: this bit should only be enabled when really needed, then
6704          * disabled when not needed anymore in order to save power.
6705          */
6706         if (HAS_PCH_LPT_LP(dev))
6707                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6708                            I915_READ(SOUTH_DSPCLK_GATE_D) |
6709                            PCH_LP_PARTITION_LEVEL_DISABLE);
6710
6711         /* WADPOClockGatingDisable:hsw */
6712         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6713                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6714                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6715 }
6716
6717 static void lpt_suspend_hw(struct drm_device *dev)
6718 {
6719         struct drm_i915_private *dev_priv = dev->dev_private;
6720
6721         if (HAS_PCH_LPT_LP(dev)) {
6722                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6723
6724                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6725                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6726         }
6727 }
6728
6729 static void kabylake_init_clock_gating(struct drm_device *dev)
6730 {
6731         struct drm_i915_private *dev_priv = dev->dev_private;
6732
6733         gen9_init_clock_gating(dev);
6734
6735         /* WaDisableSDEUnitClockGating:kbl */
6736         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
6737                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6738                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6739
6740         /* WaDisableGamClockGating:kbl */
6741         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
6742                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6743                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
6744
6745         /* WaFbcNukeOnHostModify:kbl */
6746         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
6747                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
6748 }
6749
6750 static void skylake_init_clock_gating(struct drm_device *dev)
6751 {
6752         struct drm_i915_private *dev_priv = dev->dev_private;
6753
6754         gen9_init_clock_gating(dev);
6755
6756         /* WaFbcNukeOnHostModify:skl */
6757         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
6758                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
6759 }
6760
6761 static void broadwell_init_clock_gating(struct drm_device *dev)
6762 {
6763         struct drm_i915_private *dev_priv = dev->dev_private;
6764         enum pipe pipe;
6765         uint32_t misccpctl;
6766
6767         ilk_init_lp_watermarks(dev);
6768
6769         /* WaSwitchSolVfFArbitrationPriority:bdw */
6770         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6771
6772         /* WaPsrDPAMaskVBlankInSRD:bdw */
6773         I915_WRITE(CHICKEN_PAR1_1,
6774                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6775
6776         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6777         for_each_pipe(dev_priv, pipe) {
6778                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6779                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
6780                            BDW_DPRS_MASK_VBLANK_SRD);
6781         }
6782
6783         /* WaVSRefCountFullforceMissDisable:bdw */
6784         /* WaDSRefCountFullforceMissDisable:bdw */
6785         I915_WRITE(GEN7_FF_THREAD_MODE,
6786                    I915_READ(GEN7_FF_THREAD_MODE) &
6787                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6788
6789         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6790                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6791
6792         /* WaDisableSDEUnitClockGating:bdw */
6793         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6794                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6795
6796         /*
6797          * WaProgramL3SqcReg1Default:bdw
6798          * WaTempDisableDOPClkGating:bdw
6799          */
6800         misccpctl = I915_READ(GEN7_MISCCPCTL);
6801         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6802         I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6803         /*
6804          * Wait at least 100 clocks before re-enabling clock gating. See
6805          * the definition of L3SQCREG1 in BSpec.
6806          */
6807         POSTING_READ(GEN8_L3SQCREG1);
6808         udelay(1);
6809         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6810
6811         /*
6812          * WaGttCachingOffByDefault:bdw
6813          * GTT cache may not work with big pages, so if those
6814          * are ever enabled GTT cache may need to be disabled.
6815          */
6816         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6817
6818         lpt_init_clock_gating(dev);
6819 }
6820
6821 static void haswell_init_clock_gating(struct drm_device *dev)
6822 {
6823         struct drm_i915_private *dev_priv = dev->dev_private;
6824
6825         ilk_init_lp_watermarks(dev);
6826
6827         /* L3 caching of data atomics doesn't work -- disable it. */
6828         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6829         I915_WRITE(HSW_ROW_CHICKEN3,
6830                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6831
6832         /* This is required by WaCatErrorRejectionIssue:hsw */
6833         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6834                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6835                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6836
6837         /* WaVSRefCountFullforceMissDisable:hsw */
6838         I915_WRITE(GEN7_FF_THREAD_MODE,
6839                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6840
6841         /* WaDisable_RenderCache_OperationalFlush:hsw */
6842         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6843
6844         /* enable HiZ Raw Stall Optimization */
6845         I915_WRITE(CACHE_MODE_0_GEN7,
6846                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6847
6848         /* WaDisable4x2SubspanOptimization:hsw */
6849         I915_WRITE(CACHE_MODE_1,
6850                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6851
6852         /*
6853          * BSpec recommends 8x4 when MSAA is used,
6854          * however in practice 16x4 seems fastest.
6855          *
6856          * Note that PS/WM thread counts depend on the WIZ hashing
6857          * disable bit, which we don't touch here, but it's good
6858          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6859          */
6860         I915_WRITE(GEN7_GT_MODE,
6861                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6862
6863         /* WaSampleCChickenBitEnable:hsw */
6864         I915_WRITE(HALF_SLICE_CHICKEN3,
6865                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6866
6867         /* WaSwitchSolVfFArbitrationPriority:hsw */
6868         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6869
6870         /* WaRsPkgCStateDisplayPMReq:hsw */
6871         I915_WRITE(CHICKEN_PAR1_1,
6872                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6873
6874         lpt_init_clock_gating(dev);
6875 }
6876
6877 static void ivybridge_init_clock_gating(struct drm_device *dev)
6878 {
6879         struct drm_i915_private *dev_priv = dev->dev_private;
6880         uint32_t snpcr;
6881
6882         ilk_init_lp_watermarks(dev);
6883
6884         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6885
6886         /* WaDisableEarlyCull:ivb */
6887         I915_WRITE(_3D_CHICKEN3,
6888                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6889
6890         /* WaDisableBackToBackFlipFix:ivb */
6891         I915_WRITE(IVB_CHICKEN3,
6892                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6893                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6894
6895         /* WaDisablePSDDualDispatchEnable:ivb */
6896         if (IS_IVB_GT1(dev))
6897                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6898                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6899
6900         /* WaDisable_RenderCache_OperationalFlush:ivb */
6901         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6902
6903         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6904         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6905                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6906
6907         /* WaApplyL3ControlAndL3ChickenMode:ivb */
6908         I915_WRITE(GEN7_L3CNTLREG1,
6909                         GEN7_WA_FOR_GEN7_L3_CONTROL);
6910         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6911                    GEN7_WA_L3_CHICKEN_MODE);
6912         if (IS_IVB_GT1(dev))
6913                 I915_WRITE(GEN7_ROW_CHICKEN2,
6914                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6915         else {
6916                 /* must write both registers */
6917                 I915_WRITE(GEN7_ROW_CHICKEN2,
6918                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6919                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6920                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6921         }
6922
6923         /* WaForceL3Serialization:ivb */
6924         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6925                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6926
6927         /*
6928          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6929          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6930          */
6931         I915_WRITE(GEN6_UCGCTL2,
6932                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6933
6934         /* This is required by WaCatErrorRejectionIssue:ivb */
6935         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6936                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6937                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6938
6939         g4x_disable_trickle_feed(dev);
6940
6941         gen7_setup_fixed_func_scheduler(dev_priv);
6942
6943         if (0) { /* causes HiZ corruption on ivb:gt1 */
6944                 /* enable HiZ Raw Stall Optimization */
6945                 I915_WRITE(CACHE_MODE_0_GEN7,
6946                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6947         }
6948
6949         /* WaDisable4x2SubspanOptimization:ivb */
6950         I915_WRITE(CACHE_MODE_1,
6951                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6952
6953         /*
6954          * BSpec recommends 8x4 when MSAA is used,
6955          * however in practice 16x4 seems fastest.
6956          *
6957          * Note that PS/WM thread counts depend on the WIZ hashing
6958          * disable bit, which we don't touch here, but it's good
6959          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6960          */
6961         I915_WRITE(GEN7_GT_MODE,
6962                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6963
6964         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6965         snpcr &= ~GEN6_MBC_SNPCR_MASK;
6966         snpcr |= GEN6_MBC_SNPCR_MED;
6967         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6968
6969         if (!HAS_PCH_NOP(dev))
6970                 cpt_init_clock_gating(dev);
6971
6972         gen6_check_mch_setup(dev);
6973 }
6974
6975 static void valleyview_init_clock_gating(struct drm_device *dev)
6976 {
6977         struct drm_i915_private *dev_priv = dev->dev_private;
6978
6979         /* WaDisableEarlyCull:vlv */
6980         I915_WRITE(_3D_CHICKEN3,
6981                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6982
6983         /* WaDisableBackToBackFlipFix:vlv */
6984         I915_WRITE(IVB_CHICKEN3,
6985                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6986                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6987
6988         /* WaPsdDispatchEnable:vlv */
6989         /* WaDisablePSDDualDispatchEnable:vlv */
6990         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6991                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6992                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6993
6994         /* WaDisable_RenderCache_OperationalFlush:vlv */
6995         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6996
6997         /* WaForceL3Serialization:vlv */
6998         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6999                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
7000
7001         /* WaDisableDopClockGating:vlv */
7002         I915_WRITE(GEN7_ROW_CHICKEN2,
7003                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
7004
7005         /* This is required by WaCatErrorRejectionIssue:vlv */
7006         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7007                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7008                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7009
7010         gen7_setup_fixed_func_scheduler(dev_priv);
7011
7012         /*
7013          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
7014          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
7015          */
7016         I915_WRITE(GEN6_UCGCTL2,
7017                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
7018
7019         /* WaDisableL3Bank2xClockGate:vlv
7020          * Disabling L3 clock gating- MMIO 940c[25] = 1
7021          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
7022         I915_WRITE(GEN7_UCGCTL4,
7023                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
7024
7025         /*
7026          * BSpec says this must be set, even though
7027          * WaDisable4x2SubspanOptimization isn't listed for VLV.
7028          */
7029         I915_WRITE(CACHE_MODE_1,
7030                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7031
7032         /*
7033          * BSpec recommends 8x4 when MSAA is used,
7034          * however in practice 16x4 seems fastest.
7035          *
7036          * Note that PS/WM thread counts depend on the WIZ hashing
7037          * disable bit, which we don't touch here, but it's good
7038          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7039          */
7040         I915_WRITE(GEN7_GT_MODE,
7041                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7042
7043         /*
7044          * WaIncreaseL3CreditsForVLVB0:vlv
7045          * This is the hardware default actually.
7046          */
7047         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
7048
7049         /*
7050          * WaDisableVLVClockGating_VBIIssue:vlv
7051          * Disable clock gating on th GCFG unit to prevent a delay
7052          * in the reporting of vblank events.
7053          */
7054         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
7055 }
7056
7057 static void cherryview_init_clock_gating(struct drm_device *dev)
7058 {
7059         struct drm_i915_private *dev_priv = dev->dev_private;
7060
7061         /* WaVSRefCountFullforceMissDisable:chv */
7062         /* WaDSRefCountFullforceMissDisable:chv */
7063         I915_WRITE(GEN7_FF_THREAD_MODE,
7064                    I915_READ(GEN7_FF_THREAD_MODE) &
7065                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
7066
7067         /* WaDisableSemaphoreAndSyncFlipWait:chv */
7068         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
7069                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
7070
7071         /* WaDisableCSUnitClockGating:chv */
7072         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
7073                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
7074
7075         /* WaDisableSDEUnitClockGating:chv */
7076         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
7077                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
7078
7079         /*
7080          * GTT cache may not work with big pages, so if those
7081          * are ever enabled GTT cache may need to be disabled.
7082          */
7083         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
7084 }
7085
7086 static void g4x_init_clock_gating(struct drm_device *dev)
7087 {
7088         struct drm_i915_private *dev_priv = dev->dev_private;
7089         uint32_t dspclk_gate;
7090
7091         I915_WRITE(RENCLK_GATE_D1, 0);
7092         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
7093                    GS_UNIT_CLOCK_GATE_DISABLE |
7094                    CL_UNIT_CLOCK_GATE_DISABLE);
7095         I915_WRITE(RAMCLK_GATE_D, 0);
7096         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
7097                 OVRUNIT_CLOCK_GATE_DISABLE |
7098                 OVCUNIT_CLOCK_GATE_DISABLE;
7099         if (IS_GM45(dev))
7100                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
7101         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
7102
7103         /* WaDisableRenderCachePipelinedFlush */
7104         I915_WRITE(CACHE_MODE_0,
7105                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7106
7107         /* WaDisable_RenderCache_OperationalFlush:g4x */
7108         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7109
7110         g4x_disable_trickle_feed(dev);
7111 }
7112
7113 static void crestline_init_clock_gating(struct drm_device *dev)
7114 {
7115         struct drm_i915_private *dev_priv = dev->dev_private;
7116
7117         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7118         I915_WRITE(RENCLK_GATE_D2, 0);
7119         I915_WRITE(DSPCLK_GATE_D, 0);
7120         I915_WRITE(RAMCLK_GATE_D, 0);
7121         I915_WRITE16(DEUC, 0);
7122         I915_WRITE(MI_ARB_STATE,
7123                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7124
7125         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7126         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7127 }
7128
7129 static void broadwater_init_clock_gating(struct drm_device *dev)
7130 {
7131         struct drm_i915_private *dev_priv = dev->dev_private;
7132
7133         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7134                    I965_RCC_CLOCK_GATE_DISABLE |
7135                    I965_RCPB_CLOCK_GATE_DISABLE |
7136                    I965_ISC_CLOCK_GATE_DISABLE |
7137                    I965_FBC_CLOCK_GATE_DISABLE);
7138         I915_WRITE(RENCLK_GATE_D2, 0);
7139         I915_WRITE(MI_ARB_STATE,
7140                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7141
7142         /* WaDisable_RenderCache_OperationalFlush:gen4 */
7143         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7144 }
7145
7146 static void gen3_init_clock_gating(struct drm_device *dev)
7147 {
7148         struct drm_i915_private *dev_priv = dev->dev_private;
7149         u32 dstate = I915_READ(D_STATE);
7150
7151         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7152                 DSTATE_DOT_CLOCK_GATING;
7153         I915_WRITE(D_STATE, dstate);
7154
7155         if (IS_PINEVIEW(dev))
7156                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7157
7158         /* IIR "flip pending" means done if this bit is set */
7159         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7160
7161         /* interrupts should cause a wake up from C3 */
7162         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7163
7164         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7165         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7166
7167         I915_WRITE(MI_ARB_STATE,
7168                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7169 }
7170
7171 static void i85x_init_clock_gating(struct drm_device *dev)
7172 {
7173         struct drm_i915_private *dev_priv = dev->dev_private;
7174
7175         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7176
7177         /* interrupts should cause a wake up from C3 */
7178         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7179                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7180
7181         I915_WRITE(MEM_MODE,
7182                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7183 }
7184
7185 static void i830_init_clock_gating(struct drm_device *dev)
7186 {
7187         struct drm_i915_private *dev_priv = dev->dev_private;
7188
7189         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7190
7191         I915_WRITE(MEM_MODE,
7192                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7193                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7194 }
7195
7196 void intel_init_clock_gating(struct drm_device *dev)
7197 {
7198         struct drm_i915_private *dev_priv = dev->dev_private;
7199
7200         dev_priv->display.init_clock_gating(dev);
7201 }
7202
7203 void intel_suspend_hw(struct drm_device *dev)
7204 {
7205         if (HAS_PCH_LPT(dev))
7206                 lpt_suspend_hw(dev);
7207 }
7208
7209 static void nop_init_clock_gating(struct drm_device *dev)
7210 {
7211         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
7212 }
7213
7214 /**
7215  * intel_init_clock_gating_hooks - setup the clock gating hooks
7216  * @dev_priv: device private
7217  *
7218  * Setup the hooks that configure which clocks of a given platform can be
7219  * gated and also apply various GT and display specific workarounds for these
7220  * platforms. Note that some GT specific workarounds are applied separately
7221  * when GPU contexts or batchbuffers start their execution.
7222  */
7223 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
7224 {
7225         if (IS_SKYLAKE(dev_priv))
7226                 dev_priv->display.init_clock_gating = skylake_init_clock_gating;
7227         else if (IS_KABYLAKE(dev_priv))
7228                 dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
7229         else if (IS_BROXTON(dev_priv))
7230                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
7231         else if (IS_BROADWELL(dev_priv))
7232                 dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7233         else if (IS_CHERRYVIEW(dev_priv))
7234                 dev_priv->display.init_clock_gating = cherryview_init_clock_gating;
7235         else if (IS_HASWELL(dev_priv))
7236                 dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7237         else if (IS_IVYBRIDGE(dev_priv))
7238                 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7239         else if (IS_VALLEYVIEW(dev_priv))
7240                 dev_priv->display.init_clock_gating = valleyview_init_clock_gating;
7241         else if (IS_GEN6(dev_priv))
7242                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7243         else if (IS_GEN5(dev_priv))
7244                 dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7245         else if (IS_G4X(dev_priv))
7246                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7247         else if (IS_CRESTLINE(dev_priv))
7248                 dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7249         else if (IS_BROADWATER(dev_priv))
7250                 dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7251         else if (IS_GEN3(dev_priv))
7252                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7253         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
7254                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7255         else if (IS_GEN2(dev_priv))
7256                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
7257         else {
7258                 MISSING_CASE(INTEL_DEVID(dev_priv));
7259                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
7260         }
7261 }
7262
7263 /* Set up chip specific power management-related functions */
7264 void intel_init_pm(struct drm_device *dev)
7265 {
7266         struct drm_i915_private *dev_priv = dev->dev_private;
7267
7268         intel_fbc_init(dev_priv);
7269
7270         /* For cxsr */
7271         if (IS_PINEVIEW(dev))
7272                 i915_pineview_get_mem_freq(dev);
7273         else if (IS_GEN5(dev))
7274                 i915_ironlake_get_mem_freq(dev);
7275
7276         /* For FIFO watermark updates */
7277         if (INTEL_INFO(dev)->gen >= 9) {
7278                 skl_setup_wm_latency(dev);
7279                 dev_priv->display.update_wm = skl_update_wm;
7280         } else if (HAS_PCH_SPLIT(dev)) {
7281                 ilk_setup_wm_latency(dev);
7282
7283                 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7284                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7285                     (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7286                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7287                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
7288                         dev_priv->display.compute_intermediate_wm =
7289                                 ilk_compute_intermediate_wm;
7290                         dev_priv->display.initial_watermarks =
7291                                 ilk_initial_watermarks;
7292                         dev_priv->display.optimize_watermarks =
7293                                 ilk_optimize_watermarks;
7294                 } else {
7295                         DRM_DEBUG_KMS("Failed to read display plane latency. "
7296                                       "Disable CxSR\n");
7297                 }
7298         } else if (IS_CHERRYVIEW(dev)) {
7299                 vlv_setup_wm_latency(dev);
7300                 dev_priv->display.update_wm = vlv_update_wm;
7301         } else if (IS_VALLEYVIEW(dev)) {
7302                 vlv_setup_wm_latency(dev);
7303                 dev_priv->display.update_wm = vlv_update_wm;
7304         } else if (IS_PINEVIEW(dev)) {
7305                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7306                                             dev_priv->is_ddr3,
7307                                             dev_priv->fsb_freq,
7308                                             dev_priv->mem_freq)) {
7309                         DRM_INFO("failed to find known CxSR latency "
7310                                  "(found ddr%s fsb freq %d, mem freq %d), "
7311                                  "disabling CxSR\n",
7312                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
7313                                  dev_priv->fsb_freq, dev_priv->mem_freq);
7314                         /* Disable CxSR and never update its watermark again */
7315                         intel_set_memory_cxsr(dev_priv, false);
7316                         dev_priv->display.update_wm = NULL;
7317                 } else
7318                         dev_priv->display.update_wm = pineview_update_wm;
7319         } else if (IS_G4X(dev)) {
7320                 dev_priv->display.update_wm = g4x_update_wm;
7321         } else if (IS_GEN4(dev)) {
7322                 dev_priv->display.update_wm = i965_update_wm;
7323         } else if (IS_GEN3(dev)) {
7324                 dev_priv->display.update_wm = i9xx_update_wm;
7325                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7326         } else if (IS_GEN2(dev)) {
7327                 if (INTEL_INFO(dev)->num_pipes == 1) {
7328                         dev_priv->display.update_wm = i845_update_wm;
7329                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
7330                 } else {
7331                         dev_priv->display.update_wm = i9xx_update_wm;
7332                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
7333                 }
7334         } else {
7335                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7336         }
7337 }
7338
7339 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7340 {
7341         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7342
7343         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7344                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7345                 return -EAGAIN;
7346         }
7347
7348         I915_WRITE(GEN6_PCODE_DATA, *val);
7349         I915_WRITE(GEN6_PCODE_DATA1, 0);
7350         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7351
7352         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7353                      500)) {
7354                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7355                 return -ETIMEDOUT;
7356         }
7357
7358         *val = I915_READ(GEN6_PCODE_DATA);
7359         I915_WRITE(GEN6_PCODE_DATA, 0);
7360
7361         return 0;
7362 }
7363
7364 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7365 {
7366         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7367
7368         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7369                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7370                 return -EAGAIN;
7371         }
7372
7373         I915_WRITE(GEN6_PCODE_DATA, val);
7374         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7375
7376         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7377                      500)) {
7378                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7379                 return -ETIMEDOUT;
7380         }
7381
7382         I915_WRITE(GEN6_PCODE_DATA, 0);
7383
7384         return 0;
7385 }
7386
7387 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7388 {
7389         /*
7390          * N = val - 0xb7
7391          * Slow = Fast = GPLL ref * N
7392          */
7393         return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000);
7394 }
7395
7396 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7397 {
7398         return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7;
7399 }
7400
7401 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7402 {
7403         /*
7404          * N = val / 2
7405          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
7406          */
7407         return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000);
7408 }
7409
7410 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7411 {
7412         /* CHV needs even values */
7413         return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2;
7414 }
7415
7416 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7417 {
7418         if (IS_GEN9(dev_priv))
7419                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
7420                                          GEN9_FREQ_SCALER);
7421         else if (IS_CHERRYVIEW(dev_priv))
7422                 return chv_gpu_freq(dev_priv, val);
7423         else if (IS_VALLEYVIEW(dev_priv))
7424                 return byt_gpu_freq(dev_priv, val);
7425         else
7426                 return val * GT_FREQUENCY_MULTIPLIER;
7427 }
7428
7429 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7430 {
7431         if (IS_GEN9(dev_priv))
7432                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
7433                                          GT_FREQUENCY_MULTIPLIER);
7434         else if (IS_CHERRYVIEW(dev_priv))
7435                 return chv_freq_opcode(dev_priv, val);
7436         else if (IS_VALLEYVIEW(dev_priv))
7437                 return byt_freq_opcode(dev_priv, val);
7438         else
7439                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
7440 }
7441
7442 struct request_boost {
7443         struct work_struct work;
7444         struct drm_i915_gem_request *req;
7445 };
7446
7447 static void __intel_rps_boost_work(struct work_struct *work)
7448 {
7449         struct request_boost *boost = container_of(work, struct request_boost, work);
7450         struct drm_i915_gem_request *req = boost->req;
7451
7452         if (!i915_gem_request_completed(req, true))
7453                 gen6_rps_boost(to_i915(req->engine->dev), NULL,
7454                                req->emitted_jiffies);
7455
7456         i915_gem_request_unreference__unlocked(req);
7457         kfree(boost);
7458 }
7459
7460 void intel_queue_rps_boost_for_request(struct drm_device *dev,
7461                                        struct drm_i915_gem_request *req)
7462 {
7463         struct request_boost *boost;
7464
7465         if (req == NULL || INTEL_INFO(dev)->gen < 6)
7466                 return;
7467
7468         if (i915_gem_request_completed(req, true))
7469                 return;
7470
7471         boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
7472         if (boost == NULL)
7473                 return;
7474
7475         i915_gem_request_reference(req);
7476         boost->req = req;
7477
7478         INIT_WORK(&boost->work, __intel_rps_boost_work);
7479         queue_work(to_i915(dev)->wq, &boost->work);
7480 }
7481
7482 void intel_pm_setup(struct drm_device *dev)
7483 {
7484         struct drm_i915_private *dev_priv = dev->dev_private;
7485
7486         mutex_init(&dev_priv->rps.hw_lock);
7487         spin_lock_init(&dev_priv->rps.client_lock);
7488
7489         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7490                           intel_gen6_powersave_work);
7491         INIT_LIST_HEAD(&dev_priv->rps.clients);
7492         INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7493         INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7494
7495         dev_priv->pm.suspended = false;
7496         atomic_set(&dev_priv->pm.wakeref_count, 0);
7497         atomic_set(&dev_priv->pm.atomic_seq, 0);
7498 }