]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge tag 'v4.1-rc6' into drm-next
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182         unsigned long flags;
183         u32 r;
184
185         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186         WREG32(CIK_DIDT_IND_INDEX, (reg));
187         r = RREG32(CIK_DIDT_IND_DATA);
188         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189         return r;
190 }
191
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194         unsigned long flags;
195
196         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197         WREG32(CIK_DIDT_IND_INDEX, (reg));
198         WREG32(CIK_DIDT_IND_DATA, (v));
199         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205         u32 temp;
206         int actual_temp = 0;
207
208         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209                 CTF_TEMP_SHIFT;
210
211         if (temp & 0x200)
212                 actual_temp = 255;
213         else
214                 actual_temp = temp & 0x1ff;
215
216         actual_temp = actual_temp * 1000;
217
218         return actual_temp;
219 }
220
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224         u32 temp;
225         int actual_temp = 0;
226
227         temp = RREG32_SMC(0xC0300E0C);
228
229         if (temp)
230                 actual_temp = (temp / 8) - 49;
231         else
232                 actual_temp = 0;
233
234         actual_temp = actual_temp * 1000;
235
236         return actual_temp;
237 }
238
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244         unsigned long flags;
245         u32 r;
246
247         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248         WREG32(PCIE_INDEX, reg);
249         (void)RREG32(PCIE_INDEX);
250         r = RREG32(PCIE_DATA);
251         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252         return r;
253 }
254
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257         unsigned long flags;
258
259         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260         WREG32(PCIE_INDEX, reg);
261         (void)RREG32(PCIE_INDEX);
262         WREG32(PCIE_DATA, v);
263         (void)RREG32(PCIE_DATA);
264         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269         (0x0e00 << 16) | (0xc12c >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc140 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc150 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc15c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc168 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc170 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc178 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc204 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc2b4 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b8 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2bc >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2c0 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x8228 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x829c >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x869c >> 2),
298         0x00000000,
299         (0x0600 << 16) | (0x98f4 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x98f8 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x9900 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc260 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x90e8 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x3c000 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c00c >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x8c1c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x9700 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x89bc >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x8900 >> 2),
338         0x00000000,
339         0x3,
340         (0x0e00 << 16) | (0xc130 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc134 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc1fc >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc208 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc264 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc268 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc26c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc270 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc274 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc278 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc27c >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc280 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc284 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc288 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc28c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc290 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc294 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc298 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc29c >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc2a0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a4 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a8 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2ac  >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2b0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x301d0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x30238 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30250 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30254 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30258 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x3025c >> 2),
399         0x00000000,
400         (0x4e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0x5e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x6e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x7e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x8e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x9e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0xae00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xbe00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0x4e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0x5e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x6e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x7e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x8e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x9e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0xae00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xbe00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0x4e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0x5e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x6e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x7e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x8e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x9e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0xae00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xbe00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0x4e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0x5e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x6e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x7e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x8e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x9e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0xae00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xbe00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0x4e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0x5e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x6e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x7e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x8e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x9e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0xae00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xbe00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xc99c >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x9834 >> 2),
483         0x00000000,
484         (0x0000 << 16) | (0x30f00 >> 2),
485         0x00000000,
486         (0x0001 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f04 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f08 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f0c >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0600 << 16) | (0x9b7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8a14 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a18 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a00 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8bf0 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bcc >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8b24 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x30a04 >> 2),
515         0x00000000,
516         (0x0600 << 16) | (0x30a10 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a14 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a18 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a2c >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xc700 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc704 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc708 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc768 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc770 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc774 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc778 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc77c >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc780 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc784 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc788 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc78c >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc798 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc79c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7a0 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a4 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a8 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7ac >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7b0 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x9100 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x3c010 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92a8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92ac >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92b4 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b8 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92bc >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92c0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c4 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c8 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92cc >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92d0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x8c00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c04 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c20 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c38 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c3c >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xae00 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x9604 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac08 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac0c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac10 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac14 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac58 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac68 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac6c >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac70 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac74 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac78 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac7c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac80 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac84 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac88 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac8c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x970c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9714 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9718 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x971c >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x8e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x9e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0xae00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xbe00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xcd10 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd14 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88b0 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b4 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88bc >> 2),
669         0x00000000,
670         (0x0400 << 16) | (0x89c0 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88c4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d4 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d8 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8980 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30938 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3093c >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30940 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x89a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30900 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30904 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89b4 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c210 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c214 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c218 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x8904 >> 2),
705         0x00000000,
706         0x5,
707         (0x0e00 << 16) | (0x8c28 >> 2),
708         (0x0e00 << 16) | (0x8c2c >> 2),
709         (0x0e00 << 16) | (0x8c30 >> 2),
710         (0x0e00 << 16) | (0x8c34 >> 2),
711         (0x0e00 << 16) | (0x9600 >> 2),
712 };
713
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716         (0x0e00 << 16) | (0xc12c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc140 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc150 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc15c >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc168 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc170 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc204 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2b4 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b8 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2bc >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2c0 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x8228 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x829c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x869c >> 2),
743         0x00000000,
744         (0x0600 << 16) | (0x98f4 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x98f8 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9900 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc260 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x90e8 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x3c000 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c00c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8c1c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x9700 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xcd20 >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x89bc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8900 >> 2),
775         0x00000000,
776         0x3,
777         (0x0e00 << 16) | (0xc130 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc134 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc1fc >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc208 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc264 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc268 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc26c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc270 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc274 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc28c >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc290 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc294 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc298 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc2a0 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a4 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a8 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2ac >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x301d0 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30238 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30250 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30254 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30258 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x3025c >> 2),
822         0x00000000,
823         (0x4e00 << 16) | (0xc900 >> 2),
824         0x00000000,
825         (0x5e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x6e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x7e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x4e00 << 16) | (0xc904 >> 2),
832         0x00000000,
833         (0x5e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x6e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x7e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x4e00 << 16) | (0xc908 >> 2),
840         0x00000000,
841         (0x5e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x6e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x7e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x4e00 << 16) | (0xc90c >> 2),
848         0x00000000,
849         (0x5e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x6e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x7e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x4e00 << 16) | (0xc910 >> 2),
856         0x00000000,
857         (0x5e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x6e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x7e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xc99c >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x9834 >> 2),
866         0x00000000,
867         (0x0000 << 16) | (0x30f00 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f04 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f08 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f0c >> 2),
874         0x00000000,
875         (0x0600 << 16) | (0x9b7c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8a14 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a18 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a00 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8bf0 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bcc >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8b24 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30a04 >> 2),
890         0x00000000,
891         (0x0600 << 16) | (0x30a10 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a14 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a18 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a2c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xc700 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc704 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc708 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc768 >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0xc770 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc774 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc798 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc79c >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x9100 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x3c010 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8c00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c04 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c20 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c38 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c3c >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xae00 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x9604 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac08 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac0c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac10 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac14 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac58 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac68 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac6c >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac70 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac74 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac78 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac7c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac80 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac84 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac88 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac8c >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x970c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x9714 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9718 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x971c >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x31068 >> 2),
972         0x00000000,
973         (0x4e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x5e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x6e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x7e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0xcd10 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd14 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88b0 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b4 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b8 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88bc >> 2),
992         0x00000000,
993         (0x0400 << 16) | (0x89c0 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88c4 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c8 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88d0 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d4 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d8 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x8980 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30938 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x3093c >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30940 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x89a0 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30900 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30904 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x89b4 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x3e1fc >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3c210 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c214 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c218 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x8904 >> 2),
1030         0x00000000,
1031         0x5,
1032         (0x0e00 << 16) | (0x8c28 >> 2),
1033         (0x0e00 << 16) | (0x8c2c >> 2),
1034         (0x0e00 << 16) | (0x8c30 >> 2),
1035         (0x0e00 << 16) | (0x8c34 >> 2),
1036         (0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041         0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046         0xc770, 0xffffffff, 0x00000800,
1047         0xc774, 0xffffffff, 0x00000800,
1048         0xc798, 0xffffffff, 0x00007fbf,
1049         0xc79c, 0xffffffff, 0x00007faf
1050 };
1051
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054         0x3354, 0x00000333, 0x00000333,
1055         0x3350, 0x000c0fc0, 0x00040200,
1056         0x9a10, 0x00010000, 0x00058208,
1057         0x3c000, 0xffff1fff, 0x00140000,
1058         0x3c200, 0xfdfc0fff, 0x00000100,
1059         0x3c234, 0x40000000, 0x40000200,
1060         0x9830, 0xffffffff, 0x00000000,
1061         0x9834, 0xf00fffff, 0x00000400,
1062         0x9838, 0x0002021c, 0x00020200,
1063         0xc78, 0x00000080, 0x00000000,
1064         0x5bb0, 0x000000f0, 0x00000070,
1065         0x5bc0, 0xf0311fff, 0x80300000,
1066         0x98f8, 0x73773777, 0x12010001,
1067         0x350c, 0x00810000, 0x408af000,
1068         0x7030, 0x31000111, 0x00000011,
1069         0x2f48, 0x73773777, 0x12010001,
1070         0x220c, 0x00007fb6, 0x0021a1b1,
1071         0x2210, 0x00007fb6, 0x002021b1,
1072         0x2180, 0x00007fb6, 0x00002191,
1073         0x2218, 0x00007fb6, 0x002121b1,
1074         0x221c, 0x00007fb6, 0x002021b1,
1075         0x21dc, 0x00007fb6, 0x00002191,
1076         0x21e0, 0x00007fb6, 0x00002191,
1077         0x3628, 0x0000003f, 0x0000000a,
1078         0x362c, 0x0000003f, 0x0000000a,
1079         0x2ae4, 0x00073ffe, 0x000022a2,
1080         0x240c, 0x000007ff, 0x00000000,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8bf0, 0x00002001, 0x00000001,
1083         0x8b24, 0xffffffff, 0x00ffffff,
1084         0x30a04, 0x0000ff0f, 0x00000000,
1085         0x28a4c, 0x07ffffff, 0x06000000,
1086         0x4d8, 0x00000fff, 0x00000100,
1087         0x3e78, 0x00000001, 0x00000002,
1088         0x9100, 0x03000000, 0x0362c688,
1089         0x8c00, 0x000000ff, 0x00000001,
1090         0xe40, 0x00001fff, 0x00001fff,
1091         0x9060, 0x0000007f, 0x00000020,
1092         0x9508, 0x00010000, 0x00010000,
1093         0xac14, 0x000003ff, 0x000000f3,
1094         0xac0c, 0xffffffff, 0x00001032
1095 };
1096
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0xc0000100,
1104         0x3c2c8, 0xffffffff, 0xc0000100,
1105         0x3c2c4, 0xffffffff, 0xc0000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c000, 0xffffffff, 0x96e00200,
1167         0x8708, 0xffffffff, 0x00900100,
1168         0xc424, 0xffffffff, 0x0020003f,
1169         0x38, 0xffffffff, 0x0140001c,
1170         0x3c, 0x000f0000, 0x000f0000,
1171         0x220, 0xffffffff, 0xC060000C,
1172         0x224, 0xc0000fff, 0x00000100,
1173         0xf90, 0xffffffff, 0x00000100,
1174         0xf98, 0x00000101, 0x00000000,
1175         0x20a8, 0xffffffff, 0x00000104,
1176         0x55e4, 0xff000fff, 0x00000100,
1177         0x30cc, 0xc0000fff, 0x00000104,
1178         0xc1e4, 0x00000001, 0x00000001,
1179         0xd00c, 0xff000ff0, 0x00000100,
1180         0xd80c, 0xff000ff0, 0x00000100
1181 };
1182
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185         0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190         0xc770, 0xffffffff, 0x00000800,
1191         0xc774, 0xffffffff, 0x00000800,
1192         0xc798, 0xffffffff, 0x00007fbf,
1193         0xc79c, 0xffffffff, 0x00007faf
1194 };
1195
1196 static const u32 spectre_golden_registers[] =
1197 {
1198         0x3c000, 0xffff1fff, 0x96940200,
1199         0x3c00c, 0xffff0001, 0xff000000,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9834, 0xf00fffff, 0x00000400,
1203         0x9838, 0xfffffffc, 0x00020200,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x9b7c, 0x00ff0000, 0x00fc0000,
1208         0x2f48, 0x73773777, 0x12010001,
1209         0x8a14, 0xf000003f, 0x00000007,
1210         0x8b24, 0xffffffff, 0x00ffffff,
1211         0x28350, 0x3f3f3fff, 0x00000082,
1212         0x28354, 0x0000003f, 0x00000000,
1213         0x3e78, 0x00000001, 0x00000002,
1214         0x913c, 0xffff03df, 0x00000004,
1215         0xc768, 0x00000008, 0x00000008,
1216         0x8c00, 0x000008ff, 0x00000800,
1217         0x9508, 0x00010000, 0x00010000,
1218         0xac0c, 0xffffffff, 0x54763210,
1219         0x214f8, 0x01ff01ff, 0x00000002,
1220         0x21498, 0x007ff800, 0x00200000,
1221         0x2015c, 0xffffffff, 0x00000f40,
1222         0x30934, 0xffffffff, 0x00000001
1223 };
1224
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227         0xc420, 0xffffffff, 0xfffffffc,
1228         0x30800, 0xffffffff, 0xe0000000,
1229         0x3c2a0, 0xffffffff, 0x00000100,
1230         0x3c208, 0xffffffff, 0x00000100,
1231         0x3c2c0, 0xffffffff, 0x00000100,
1232         0x3c2c8, 0xffffffff, 0x00000100,
1233         0x3c2c4, 0xffffffff, 0x00000100,
1234         0x55e4, 0xffffffff, 0x00600100,
1235         0x3c280, 0xffffffff, 0x00000100,
1236         0x3c214, 0xffffffff, 0x06000100,
1237         0x3c220, 0xffffffff, 0x00000100,
1238         0x3c218, 0xffffffff, 0x06000100,
1239         0x3c204, 0xffffffff, 0x00000100,
1240         0x3c2e0, 0xffffffff, 0x00000100,
1241         0x3c224, 0xffffffff, 0x00000100,
1242         0x3c200, 0xffffffff, 0x00000100,
1243         0x3c230, 0xffffffff, 0x00000100,
1244         0x3c234, 0xffffffff, 0x00000100,
1245         0x3c250, 0xffffffff, 0x00000100,
1246         0x3c254, 0xffffffff, 0x00000100,
1247         0x3c258, 0xffffffff, 0x00000100,
1248         0x3c25c, 0xffffffff, 0x00000100,
1249         0x3c260, 0xffffffff, 0x00000100,
1250         0x3c27c, 0xffffffff, 0x00000100,
1251         0x3c278, 0xffffffff, 0x00000100,
1252         0x3c210, 0xffffffff, 0x06000100,
1253         0x3c290, 0xffffffff, 0x00000100,
1254         0x3c274, 0xffffffff, 0x00000100,
1255         0x3c2b4, 0xffffffff, 0x00000100,
1256         0x3c2b0, 0xffffffff, 0x00000100,
1257         0x3c270, 0xffffffff, 0x00000100,
1258         0x30800, 0xffffffff, 0xe0000000,
1259         0x3c020, 0xffffffff, 0x00010000,
1260         0x3c024, 0xffffffff, 0x00030002,
1261         0x3c028, 0xffffffff, 0x00040007,
1262         0x3c02c, 0xffffffff, 0x00060005,
1263         0x3c030, 0xffffffff, 0x00090008,
1264         0x3c034, 0xffffffff, 0x00010000,
1265         0x3c038, 0xffffffff, 0x00030002,
1266         0x3c03c, 0xffffffff, 0x00040007,
1267         0x3c040, 0xffffffff, 0x00060005,
1268         0x3c044, 0xffffffff, 0x00090008,
1269         0x3c048, 0xffffffff, 0x00010000,
1270         0x3c04c, 0xffffffff, 0x00030002,
1271         0x3c050, 0xffffffff, 0x00040007,
1272         0x3c054, 0xffffffff, 0x00060005,
1273         0x3c058, 0xffffffff, 0x00090008,
1274         0x3c05c, 0xffffffff, 0x00010000,
1275         0x3c060, 0xffffffff, 0x00030002,
1276         0x3c064, 0xffffffff, 0x00040007,
1277         0x3c068, 0xffffffff, 0x00060005,
1278         0x3c06c, 0xffffffff, 0x00090008,
1279         0x3c070, 0xffffffff, 0x00010000,
1280         0x3c074, 0xffffffff, 0x00030002,
1281         0x3c078, 0xffffffff, 0x00040007,
1282         0x3c07c, 0xffffffff, 0x00060005,
1283         0x3c080, 0xffffffff, 0x00090008,
1284         0x3c084, 0xffffffff, 0x00010000,
1285         0x3c088, 0xffffffff, 0x00030002,
1286         0x3c08c, 0xffffffff, 0x00040007,
1287         0x3c090, 0xffffffff, 0x00060005,
1288         0x3c094, 0xffffffff, 0x00090008,
1289         0x3c098, 0xffffffff, 0x00010000,
1290         0x3c09c, 0xffffffff, 0x00030002,
1291         0x3c0a0, 0xffffffff, 0x00040007,
1292         0x3c0a4, 0xffffffff, 0x00060005,
1293         0x3c0a8, 0xffffffff, 0x00090008,
1294         0x3c0ac, 0xffffffff, 0x00010000,
1295         0x3c0b0, 0xffffffff, 0x00030002,
1296         0x3c0b4, 0xffffffff, 0x00040007,
1297         0x3c0b8, 0xffffffff, 0x00060005,
1298         0x3c0bc, 0xffffffff, 0x00090008,
1299         0x3c000, 0xffffffff, 0x96e00200,
1300         0x8708, 0xffffffff, 0x00900100,
1301         0xc424, 0xffffffff, 0x0020003f,
1302         0x38, 0xffffffff, 0x0140001c,
1303         0x3c, 0x000f0000, 0x000f0000,
1304         0x220, 0xffffffff, 0xC060000C,
1305         0x224, 0xc0000fff, 0x00000100,
1306         0xf90, 0xffffffff, 0x00000100,
1307         0xf98, 0x00000101, 0x00000000,
1308         0x20a8, 0xffffffff, 0x00000104,
1309         0x55e4, 0xff000fff, 0x00000100,
1310         0x30cc, 0xc0000fff, 0x00000104,
1311         0xc1e4, 0x00000001, 0x00000001,
1312         0xd00c, 0xff000ff0, 0x00000100,
1313         0xd80c, 0xff000ff0, 0x00000100
1314 };
1315
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318         0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323         0xc770, 0xffffffff, 0x00000800,
1324         0xc774, 0xffffffff, 0x00000800,
1325         0xc798, 0xffffffff, 0x00007fbf,
1326         0xc79c, 0xffffffff, 0x00007faf
1327 };
1328
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331         0x3c000, 0xffffdfff, 0x6e944040,
1332         0x55e4, 0xff607fff, 0xfc000100,
1333         0x3c220, 0xff000fff, 0x00000100,
1334         0x3c224, 0xff000fff, 0x00000100,
1335         0x3c200, 0xfffc0fff, 0x00000100,
1336         0x6ed8, 0x00010101, 0x00010000,
1337         0x9830, 0xffffffff, 0x00000000,
1338         0x9834, 0xf00fffff, 0x00000400,
1339         0x5bb0, 0x000000f0, 0x00000070,
1340         0x5bc0, 0xf0311fff, 0x80300000,
1341         0x98f8, 0x73773777, 0x12010001,
1342         0x98fc, 0xffffffff, 0x00000010,
1343         0x9b7c, 0x00ff0000, 0x00fc0000,
1344         0x8030, 0x00001f0f, 0x0000100a,
1345         0x2f48, 0x73773777, 0x12010001,
1346         0x2408, 0x000fffff, 0x000c007f,
1347         0x8a14, 0xf000003f, 0x00000007,
1348         0x8b24, 0x3fff3fff, 0x00ffcfff,
1349         0x30a04, 0x0000ff0f, 0x00000000,
1350         0x28a4c, 0x07ffffff, 0x06000000,
1351         0x4d8, 0x00000fff, 0x00000100,
1352         0x3e78, 0x00000001, 0x00000002,
1353         0xc768, 0x00000008, 0x00000008,
1354         0x8c00, 0x000000ff, 0x00000003,
1355         0x214f8, 0x01ff01ff, 0x00000002,
1356         0x21498, 0x007ff800, 0x00200000,
1357         0x2015c, 0xffffffff, 0x00000f40,
1358         0x88c4, 0x001f3ae3, 0x00000082,
1359         0x88d4, 0x0000001f, 0x00000010,
1360         0x30934, 0xffffffff, 0x00000000
1361 };
1362
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffc,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00600100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c000, 0xffffffff, 0x96e00200,
1408         0x8708, 0xffffffff, 0x00900100,
1409         0xc424, 0xffffffff, 0x0020003f,
1410         0x38, 0xffffffff, 0x0140001c,
1411         0x3c, 0x000f0000, 0x000f0000,
1412         0x220, 0xffffffff, 0xC060000C,
1413         0x224, 0xc0000fff, 0x00000100,
1414         0x20a8, 0xffffffff, 0x00000104,
1415         0x55e4, 0xff000fff, 0x00000100,
1416         0x30cc, 0xc0000fff, 0x00000104,
1417         0xc1e4, 0x00000001, 0x00000001,
1418         0xd00c, 0xff000ff0, 0x00000100,
1419         0xd80c, 0xff000ff0, 0x00000100
1420 };
1421
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424         0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429         0x30800, 0xffffffff, 0xe0000000,
1430         0x28350, 0xffffffff, 0x3a00161a,
1431         0x28354, 0xffffffff, 0x0000002e,
1432         0x9a10, 0xffffffff, 0x00018208,
1433         0x98f8, 0xffffffff, 0x12011003
1434 };
1435
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438         0x3354, 0x00000333, 0x00000333,
1439         0x9a10, 0x00010000, 0x00058208,
1440         0x9830, 0xffffffff, 0x00000000,
1441         0x9834, 0xf00fffff, 0x00000400,
1442         0x9838, 0x0002021c, 0x00020200,
1443         0xc78, 0x00000080, 0x00000000,
1444         0x5bb0, 0x000000f0, 0x00000070,
1445         0x5bc0, 0xf0311fff, 0x80300000,
1446         0x350c, 0x00810000, 0x408af000,
1447         0x7030, 0x31000111, 0x00000011,
1448         0x2f48, 0x73773777, 0x12010001,
1449         0x2120, 0x0000007f, 0x0000001b,
1450         0x21dc, 0x00007fb6, 0x00002191,
1451         0x3628, 0x0000003f, 0x0000000a,
1452         0x362c, 0x0000003f, 0x0000000a,
1453         0x2ae4, 0x00073ffe, 0x000022a2,
1454         0x240c, 0x000007ff, 0x00000000,
1455         0x8bf0, 0x00002001, 0x00000001,
1456         0x8b24, 0xffffffff, 0x00ffffff,
1457         0x30a04, 0x0000ff0f, 0x00000000,
1458         0x28a4c, 0x07ffffff, 0x06000000,
1459         0x3e78, 0x00000001, 0x00000002,
1460         0xc768, 0x00000008, 0x00000008,
1461         0xc770, 0x00000f00, 0x00000800,
1462         0xc774, 0x00000f00, 0x00000800,
1463         0xc798, 0x00ffffff, 0x00ff7fbf,
1464         0xc79c, 0x00ffffff, 0x00ff7faf,
1465         0x8c00, 0x000000ff, 0x00000800,
1466         0xe40, 0x00001fff, 0x00001fff,
1467         0x9060, 0x0000007f, 0x00000020,
1468         0x9508, 0x00010000, 0x00010000,
1469         0xae00, 0x00100000, 0x000ff07c,
1470         0xac14, 0x000003ff, 0x0000000f,
1471         0xac10, 0xffffffff, 0x7564fdec,
1472         0xac0c, 0xffffffff, 0x3120b9a8,
1473         0xac08, 0x20000000, 0x0f9c0000
1474 };
1475
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478         0xc420, 0xffffffff, 0xfffffffd,
1479         0x30800, 0xffffffff, 0xe0000000,
1480         0x3c2a0, 0xffffffff, 0x00000100,
1481         0x3c208, 0xffffffff, 0x00000100,
1482         0x3c2c0, 0xffffffff, 0x00000100,
1483         0x3c2c8, 0xffffffff, 0x00000100,
1484         0x3c2c4, 0xffffffff, 0x00000100,
1485         0x55e4, 0xffffffff, 0x00200100,
1486         0x3c280, 0xffffffff, 0x00000100,
1487         0x3c214, 0xffffffff, 0x06000100,
1488         0x3c220, 0xffffffff, 0x00000100,
1489         0x3c218, 0xffffffff, 0x06000100,
1490         0x3c204, 0xffffffff, 0x00000100,
1491         0x3c2e0, 0xffffffff, 0x00000100,
1492         0x3c224, 0xffffffff, 0x00000100,
1493         0x3c200, 0xffffffff, 0x00000100,
1494         0x3c230, 0xffffffff, 0x00000100,
1495         0x3c234, 0xffffffff, 0x00000100,
1496         0x3c250, 0xffffffff, 0x00000100,
1497         0x3c254, 0xffffffff, 0x00000100,
1498         0x3c258, 0xffffffff, 0x00000100,
1499         0x3c25c, 0xffffffff, 0x00000100,
1500         0x3c260, 0xffffffff, 0x00000100,
1501         0x3c27c, 0xffffffff, 0x00000100,
1502         0x3c278, 0xffffffff, 0x00000100,
1503         0x3c210, 0xffffffff, 0x06000100,
1504         0x3c290, 0xffffffff, 0x00000100,
1505         0x3c274, 0xffffffff, 0x00000100,
1506         0x3c2b4, 0xffffffff, 0x00000100,
1507         0x3c2b0, 0xffffffff, 0x00000100,
1508         0x3c270, 0xffffffff, 0x00000100,
1509         0x30800, 0xffffffff, 0xe0000000,
1510         0x3c020, 0xffffffff, 0x00010000,
1511         0x3c024, 0xffffffff, 0x00030002,
1512         0x3c028, 0xffffffff, 0x00040007,
1513         0x3c02c, 0xffffffff, 0x00060005,
1514         0x3c030, 0xffffffff, 0x00090008,
1515         0x3c034, 0xffffffff, 0x00010000,
1516         0x3c038, 0xffffffff, 0x00030002,
1517         0x3c03c, 0xffffffff, 0x00040007,
1518         0x3c040, 0xffffffff, 0x00060005,
1519         0x3c044, 0xffffffff, 0x00090008,
1520         0x3c048, 0xffffffff, 0x00010000,
1521         0x3c04c, 0xffffffff, 0x00030002,
1522         0x3c050, 0xffffffff, 0x00040007,
1523         0x3c054, 0xffffffff, 0x00060005,
1524         0x3c058, 0xffffffff, 0x00090008,
1525         0x3c05c, 0xffffffff, 0x00010000,
1526         0x3c060, 0xffffffff, 0x00030002,
1527         0x3c064, 0xffffffff, 0x00040007,
1528         0x3c068, 0xffffffff, 0x00060005,
1529         0x3c06c, 0xffffffff, 0x00090008,
1530         0x3c070, 0xffffffff, 0x00010000,
1531         0x3c074, 0xffffffff, 0x00030002,
1532         0x3c078, 0xffffffff, 0x00040007,
1533         0x3c07c, 0xffffffff, 0x00060005,
1534         0x3c080, 0xffffffff, 0x00090008,
1535         0x3c084, 0xffffffff, 0x00010000,
1536         0x3c088, 0xffffffff, 0x00030002,
1537         0x3c08c, 0xffffffff, 0x00040007,
1538         0x3c090, 0xffffffff, 0x00060005,
1539         0x3c094, 0xffffffff, 0x00090008,
1540         0x3c098, 0xffffffff, 0x00010000,
1541         0x3c09c, 0xffffffff, 0x00030002,
1542         0x3c0a0, 0xffffffff, 0x00040007,
1543         0x3c0a4, 0xffffffff, 0x00060005,
1544         0x3c0a8, 0xffffffff, 0x00090008,
1545         0x3c0ac, 0xffffffff, 0x00010000,
1546         0x3c0b0, 0xffffffff, 0x00030002,
1547         0x3c0b4, 0xffffffff, 0x00040007,
1548         0x3c0b8, 0xffffffff, 0x00060005,
1549         0x3c0bc, 0xffffffff, 0x00090008,
1550         0x3c0c0, 0xffffffff, 0x00010000,
1551         0x3c0c4, 0xffffffff, 0x00030002,
1552         0x3c0c8, 0xffffffff, 0x00040007,
1553         0x3c0cc, 0xffffffff, 0x00060005,
1554         0x3c0d0, 0xffffffff, 0x00090008,
1555         0x3c0d4, 0xffffffff, 0x00010000,
1556         0x3c0d8, 0xffffffff, 0x00030002,
1557         0x3c0dc, 0xffffffff, 0x00040007,
1558         0x3c0e0, 0xffffffff, 0x00060005,
1559         0x3c0e4, 0xffffffff, 0x00090008,
1560         0x3c0e8, 0xffffffff, 0x00010000,
1561         0x3c0ec, 0xffffffff, 0x00030002,
1562         0x3c0f0, 0xffffffff, 0x00040007,
1563         0x3c0f4, 0xffffffff, 0x00060005,
1564         0x3c0f8, 0xffffffff, 0x00090008,
1565         0xc318, 0xffffffff, 0x00020200,
1566         0x3350, 0xffffffff, 0x00000200,
1567         0x15c0, 0xffffffff, 0x00000400,
1568         0x55e8, 0xffffffff, 0x00000000,
1569         0x2f50, 0xffffffff, 0x00000902,
1570         0x3c000, 0xffffffff, 0x96940200,
1571         0x8708, 0xffffffff, 0x00900100,
1572         0xc424, 0xffffffff, 0x0020003f,
1573         0x38, 0xffffffff, 0x0140001c,
1574         0x3c, 0x000f0000, 0x000f0000,
1575         0x220, 0xffffffff, 0xc060000c,
1576         0x224, 0xc0000fff, 0x00000100,
1577         0xf90, 0xffffffff, 0x00000100,
1578         0xf98, 0x00000101, 0x00000000,
1579         0x20a8, 0xffffffff, 0x00000104,
1580         0x55e4, 0xff000fff, 0x00000100,
1581         0x30cc, 0xc0000fff, 0x00000104,
1582         0xc1e4, 0x00000001, 0x00000001,
1583         0xd00c, 0xff000ff0, 0x00000100,
1584         0xd80c, 0xff000ff0, 0x00000100
1585 };
1586
1587 static const u32 godavari_golden_registers[] =
1588 {
1589         0x55e4, 0xff607fff, 0xfc000100,
1590         0x6ed8, 0x00010101, 0x00010000,
1591         0x9830, 0xffffffff, 0x00000000,
1592         0x98302, 0xf00fffff, 0x00000400,
1593         0x6130, 0xffffffff, 0x00010000,
1594         0x5bb0, 0x000000f0, 0x00000070,
1595         0x5bc0, 0xf0311fff, 0x80300000,
1596         0x98f8, 0x73773777, 0x12010001,
1597         0x98fc, 0xffffffff, 0x00000010,
1598         0x8030, 0x00001f0f, 0x0000100a,
1599         0x2f48, 0x73773777, 0x12010001,
1600         0x2408, 0x000fffff, 0x000c007f,
1601         0x8a14, 0xf000003f, 0x00000007,
1602         0x8b24, 0xffffffff, 0x00ff0fff,
1603         0x30a04, 0x0000ff0f, 0x00000000,
1604         0x28a4c, 0x07ffffff, 0x06000000,
1605         0x4d8, 0x00000fff, 0x00000100,
1606         0xd014, 0x00010000, 0x00810001,
1607         0xd814, 0x00010000, 0x00810001,
1608         0x3e78, 0x00000001, 0x00000002,
1609         0xc768, 0x00000008, 0x00000008,
1610         0xc770, 0x00000f00, 0x00000800,
1611         0xc774, 0x00000f00, 0x00000800,
1612         0xc798, 0x00ffffff, 0x00ff7fbf,
1613         0xc79c, 0x00ffffff, 0x00ff7faf,
1614         0x8c00, 0x000000ff, 0x00000001,
1615         0x214f8, 0x01ff01ff, 0x00000002,
1616         0x21498, 0x007ff800, 0x00200000,
1617         0x2015c, 0xffffffff, 0x00000f40,
1618         0x88c4, 0x001f3ae3, 0x00000082,
1619         0x88d4, 0x0000001f, 0x00000010,
1620         0x30934, 0xffffffff, 0x00000000
1621 };
1622
1623
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627         mutex_lock(&rdev->grbm_idx_mutex);
1628         switch (rdev->family) {
1629         case CHIP_BONAIRE:
1630                 radeon_program_register_sequence(rdev,
1631                                                  bonaire_mgcg_cgcg_init,
1632                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633                 radeon_program_register_sequence(rdev,
1634                                                  bonaire_golden_registers,
1635                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_golden_common_registers,
1638                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_spm_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642                 break;
1643         case CHIP_KABINI:
1644                 radeon_program_register_sequence(rdev,
1645                                                  kalindi_mgcg_cgcg_init,
1646                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_common_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_spm_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656                 break;
1657         case CHIP_MULLINS:
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_mgcg_cgcg_init,
1660                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661                 radeon_program_register_sequence(rdev,
1662                                                  godavari_golden_registers,
1663                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_golden_common_registers,
1666                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  kalindi_golden_spm_registers,
1669                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670                 break;
1671         case CHIP_KAVERI:
1672                 radeon_program_register_sequence(rdev,
1673                                                  spectre_mgcg_cgcg_init,
1674                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675                 radeon_program_register_sequence(rdev,
1676                                                  spectre_golden_registers,
1677                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_golden_common_registers,
1680                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_spm_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684                 break;
1685         case CHIP_HAWAII:
1686                 radeon_program_register_sequence(rdev,
1687                                                  hawaii_mgcg_cgcg_init,
1688                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689                 radeon_program_register_sequence(rdev,
1690                                                  hawaii_golden_registers,
1691                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_golden_common_registers,
1694                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_spm_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698                 break;
1699         default:
1700                 break;
1701         }
1702         mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717         if (rdev->flags & RADEON_IS_IGP) {
1718                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719                         return reference_clock / 2;
1720         } else {
1721                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722                         return reference_clock / 4;
1723         }
1724         return reference_clock;
1725 }
1726
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738         if (index < rdev->doorbell.num_doorbells) {
1739                 return readl(rdev->doorbell.ptr + index);
1740         } else {
1741                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758         if (index < rdev->doorbell.num_doorbells) {
1759                 writel(v, rdev->doorbell.ptr + index);
1760         } else {
1761                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762         }
1763 }
1764
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769         {0x00000070, 0x04400000},
1770         {0x00000071, 0x80c01803},
1771         {0x00000072, 0x00004004},
1772         {0x00000073, 0x00000100},
1773         {0x00000074, 0x00ff0000},
1774         {0x00000075, 0x34000000},
1775         {0x00000076, 0x08000014},
1776         {0x00000077, 0x00cc08ec},
1777         {0x00000078, 0x00000400},
1778         {0x00000079, 0x00000000},
1779         {0x0000007a, 0x04090000},
1780         {0x0000007c, 0x00000000},
1781         {0x0000007e, 0x4408a8e8},
1782         {0x0000007f, 0x00000304},
1783         {0x00000080, 0x00000000},
1784         {0x00000082, 0x00000001},
1785         {0x00000083, 0x00000002},
1786         {0x00000084, 0xf3e4f400},
1787         {0x00000085, 0x052024e3},
1788         {0x00000087, 0x00000000},
1789         {0x00000088, 0x01000000},
1790         {0x0000008a, 0x1c0a0000},
1791         {0x0000008b, 0xff010000},
1792         {0x0000008d, 0xffffefff},
1793         {0x0000008e, 0xfff3efff},
1794         {0x0000008f, 0xfff3efbf},
1795         {0x00000092, 0xf7ffffff},
1796         {0x00000093, 0xffffff7f},
1797         {0x00000095, 0x00101101},
1798         {0x00000096, 0x00000fff},
1799         {0x00000097, 0x00116fff},
1800         {0x00000098, 0x60010000},
1801         {0x00000099, 0x10010000},
1802         {0x0000009a, 0x00006000},
1803         {0x0000009b, 0x00001000},
1804         {0x0000009f, 0x00b48000}
1805 };
1806
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811         {0x0000007d, 0x40000000},
1812         {0x0000007e, 0x40180304},
1813         {0x0000007f, 0x0000ff00},
1814         {0x00000081, 0x00000000},
1815         {0x00000083, 0x00000800},
1816         {0x00000086, 0x00000000},
1817         {0x00000087, 0x00000100},
1818         {0x00000088, 0x00020100},
1819         {0x00000089, 0x00000000},
1820         {0x0000008b, 0x00040000},
1821         {0x0000008c, 0x00000100},
1822         {0x0000008e, 0xff010000},
1823         {0x00000090, 0xffffefff},
1824         {0x00000091, 0xfff3efff},
1825         {0x00000092, 0xfff3efbf},
1826         {0x00000093, 0xf7ffffff},
1827         {0x00000094, 0xffffff7f},
1828         {0x00000095, 0x00000fff},
1829         {0x00000096, 0x00116fff},
1830         {0x00000097, 0x60010000},
1831         {0x00000098, 0x10010000},
1832         {0x0000009f, 0x00c79000}
1833 };
1834
1835
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850                             u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853                              MEID(me & 0x3) |
1854                              VMID(vmid & 0xf) |
1855                              QUEUEID(queue & 0x7));
1856         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870         const __be32 *fw_data = NULL;
1871         const __le32 *new_fw_data = NULL;
1872         u32 running, blackout = 0, tmp;
1873         u32 *io_mc_regs = NULL;
1874         const __le32 *new_io_mc_regs = NULL;
1875         int i, regs_size, ucode_size;
1876
1877         if (!rdev->mc_fw)
1878                 return -EINVAL;
1879
1880         if (rdev->new_fw) {
1881                 const struct mc_firmware_header_v1_0 *hdr =
1882                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884                 radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887                 new_io_mc_regs = (const __le32 *)
1888                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890                 new_fw_data = (const __le32 *)
1891                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892         } else {
1893                 ucode_size = rdev->mc_fw->size / 4;
1894
1895                 switch (rdev->family) {
1896                 case CHIP_BONAIRE:
1897                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899                         break;
1900                 case CHIP_HAWAII:
1901                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 fw_data = (const __be32 *)rdev->mc_fw->data;
1908         }
1909
1910         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912         if (running == 0) {
1913                 if (running) {
1914                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916                 }
1917
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965
1966                 if (running)
1967                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984         const char *chip_name;
1985         const char *new_chip_name;
1986         size_t pfp_req_size, me_req_size, ce_req_size,
1987                 mec_req_size, rlc_req_size, mc_req_size = 0,
1988                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989         char fw_name[30];
1990         int new_fw = 0;
1991         int err;
1992         int num_fw;
1993
1994         DRM_DEBUG("\n");
1995
1996         switch (rdev->family) {
1997         case CHIP_BONAIRE:
1998                 chip_name = "BONAIRE";
1999                 new_chip_name = "bonaire";
2000                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2002                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009                 num_fw = 8;
2010                 break;
2011         case CHIP_HAWAII:
2012                 chip_name = "HAWAII";
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         printk(KERN_ERR
2072                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073                                rdev->pfp_fw->size, fw_name);
2074                         err = -EINVAL;
2075                         goto out;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->pfp_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->me_fw->size != me_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->ce_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->ce_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141         if (err) {
2142                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144                 if (err)
2145                         goto out;
2146                 if (rdev->mec_fw->size != mec_req_size) {
2147                         printk(KERN_ERR
2148                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149                                rdev->mec_fw->size, fw_name);
2150                         err = -EINVAL;
2151                 }
2152         } else {
2153                 err = radeon_ucode_validate(rdev->mec_fw);
2154                 if (err) {
2155                         printk(KERN_ERR
2156                                "cik_fw: validation failed for firmware \"%s\"\n",
2157                                fw_name);
2158                         goto out;
2159                 } else {
2160                         new_fw++;
2161                 }
2162         }
2163
2164         if (rdev->family == CHIP_KAVERI) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167                 if (err) {
2168                         goto out;
2169                 } else {
2170                         err = radeon_ucode_validate(rdev->mec2_fw);
2171                         if (err) {
2172                                 goto out;
2173                         } else {
2174                                 new_fw++;
2175                         }
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->rlc_fw->size != rlc_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->rlc_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->rlc_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206         if (err) {
2207                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209                 if (err)
2210                         goto out;
2211                 if (rdev->sdma_fw->size != sdma_req_size) {
2212                         printk(KERN_ERR
2213                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214                                rdev->sdma_fw->size, fw_name);
2215                         err = -EINVAL;
2216                 }
2217         } else {
2218                 err = radeon_ucode_validate(rdev->sdma_fw);
2219                 if (err) {
2220                         printk(KERN_ERR
2221                                "cik_fw: validation failed for firmware \"%s\"\n",
2222                                fw_name);
2223                         goto out;
2224                 } else {
2225                         new_fw++;
2226                 }
2227         }
2228
2229         /* No SMC, MC ucode on APUs */
2230         if (!(rdev->flags & RADEON_IS_IGP)) {
2231                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233                 if (err) {
2234                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236                         if (err) {
2237                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239                                 if (err)
2240                                         goto out;
2241                         }
2242                         if ((rdev->mc_fw->size != mc_req_size) &&
2243                             (rdev->mc_fw->size != mc2_req_size)){
2244                                 printk(KERN_ERR
2245                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246                                        rdev->mc_fw->size, fw_name);
2247                                 err = -EINVAL;
2248                         }
2249                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250                 } else {
2251                         err = radeon_ucode_validate(rdev->mc_fw);
2252                         if (err) {
2253                                 printk(KERN_ERR
2254                                        "cik_fw: validation failed for firmware \"%s\"\n",
2255                                        fw_name);
2256                                 goto out;
2257                         } else {
2258                                 new_fw++;
2259                         }
2260                 }
2261
2262                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264                 if (err) {
2265                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267                         if (err) {
2268                                 printk(KERN_ERR
2269                                        "smc: error loading firmware \"%s\"\n",
2270                                        fw_name);
2271                                 release_firmware(rdev->smc_fw);
2272                                 rdev->smc_fw = NULL;
2273                                 err = 0;
2274                         } else if (rdev->smc_fw->size != smc_req_size) {
2275                                 printk(KERN_ERR
2276                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277                                        rdev->smc_fw->size, fw_name);
2278                                 err = -EINVAL;
2279                         }
2280                 } else {
2281                         err = radeon_ucode_validate(rdev->smc_fw);
2282                         if (err) {
2283                                 printk(KERN_ERR
2284                                        "cik_fw: validation failed for firmware \"%s\"\n",
2285                                        fw_name);
2286                                 goto out;
2287                         } else {
2288                                 new_fw++;
2289                         }
2290                 }
2291         }
2292
2293         if (new_fw == 0) {
2294                 rdev->new_fw = false;
2295         } else if (new_fw < num_fw) {
2296                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297                 err = -EINVAL;
2298         } else {
2299                 rdev->new_fw = true;
2300         }
2301
2302 out:
2303         if (err) {
2304                 if (err != -EINVAL)
2305                         printk(KERN_ERR
2306                                "cik_cp: Failed to load firmware \"%s\"\n",
2307                                fw_name);
2308                 release_firmware(rdev->pfp_fw);
2309                 rdev->pfp_fw = NULL;
2310                 release_firmware(rdev->me_fw);
2311                 rdev->me_fw = NULL;
2312                 release_firmware(rdev->ce_fw);
2313                 rdev->ce_fw = NULL;
2314                 release_firmware(rdev->mec_fw);
2315                 rdev->mec_fw = NULL;
2316                 release_firmware(rdev->mec2_fw);
2317                 rdev->mec2_fw = NULL;
2318                 release_firmware(rdev->rlc_fw);
2319                 rdev->rlc_fw = NULL;
2320                 release_firmware(rdev->sdma_fw);
2321                 rdev->sdma_fw = NULL;
2322                 release_firmware(rdev->mc_fw);
2323                 rdev->mc_fw = NULL;
2324                 release_firmware(rdev->smc_fw);
2325                 rdev->smc_fw = NULL;
2326         }
2327         return err;
2328 }
2329
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346         const u32 num_tile_mode_states = 32;
2347         const u32 num_secondary_tile_mode_states = 16;
2348         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349         u32 num_pipe_configs;
2350         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351                 rdev->config.cik.max_shader_engines;
2352
2353         switch (rdev->config.cik.mem_row_size_in_kb) {
2354         case 1:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356                 break;
2357         case 2:
2358         default:
2359                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360                 break;
2361         case 4:
2362                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363                 break;
2364         }
2365
2366         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367         if (num_pipe_configs > 8)
2368                 num_pipe_configs = 16;
2369
2370         if (num_pipe_configs == 16) {
2371                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372                         switch (reg_offset) {
2373                         case 0:
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378                                 break;
2379                         case 1:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384                                 break;
2385                         case 2:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390                                 break;
2391                         case 3:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396                                 break;
2397                         case 4:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                                  TILE_SPLIT(split_equal_to_row_size));
2402                                 break;
2403                         case 5:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                                 break;
2408                         case 6:
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413                                 break;
2414                         case 7:
2415                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                                  TILE_SPLIT(split_equal_to_row_size));
2419                                 break;
2420                         case 8:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                                 break;
2424                         case 9:
2425                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428                                 break;
2429                         case 10:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 11:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 12:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         case 13:
2448                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451                                 break;
2452                         case 14:
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                                 break;
2458                         case 16:
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                                 break;
2464                         case 17:
2465                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469                                 break;
2470                         case 27:
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474                                 break;
2475                         case 28:
2476                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                                 break;
2481                         case 29:
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486                                 break;
2487                         case 30:
2488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                                 break;
2493                         default:
2494                                 gb_tile_moden = 0;
2495                                 break;
2496                         }
2497                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499                 }
2500                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501                         switch (reg_offset) {
2502                         case 0:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2507                                 break;
2508                         case 1:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2513                                 break;
2514                         case 2:
2515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2519                                 break;
2520                         case 3:
2521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2525                                 break;
2526                         case 4:
2527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2531                                 break;
2532                         case 5:
2533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2537                                 break;
2538                         case 6:
2539                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2543                                 break;
2544                         case 8:
2545                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2549                                 break;
2550                         case 9:
2551                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                                 break;
2556                         case 10:
2557                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2561                                 break;
2562                         case 11:
2563                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2567                                 break;
2568                         case 12:
2569                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2573                                 break;
2574                         case 13:
2575                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2579                                 break;
2580                         case 14:
2581                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2585                                 break;
2586                         default:
2587                                 gb_tile_moden = 0;
2588                                 break;
2589                         }
2590                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592                 }
2593         } else if (num_pipe_configs == 8) {
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595                         switch (reg_offset) {
2596                         case 0:
2597                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601                                 break;
2602                         case 1:
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607                                 break;
2608                         case 2:
2609                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613                                 break;
2614                         case 3:
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619                                 break;
2620                         case 4:
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                                  TILE_SPLIT(split_equal_to_row_size));
2625                                 break;
2626                         case 5:
2627                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630                                 break;
2631                         case 6:
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636                                 break;
2637                         case 7:
2638                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                                  TILE_SPLIT(split_equal_to_row_size));
2642                                 break;
2643                         case 8:
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646                                 break;
2647                         case 9:
2648                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651                                 break;
2652                         case 10:
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                 break;
2658                         case 11:
2659                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                                 break;
2664                         case 12:
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                                 break;
2670                         case 13:
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674                                 break;
2675                         case 14:
2676                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680                                 break;
2681                         case 16:
2682                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                                 break;
2687                         case 17:
2688                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                                 break;
2693                         case 27:
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697                                 break;
2698                         case 28:
2699                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                                 break;
2704                         case 29:
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                                 break;
2710                         case 30:
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                                 break;
2716                         default:
2717                                 gb_tile_moden = 0;
2718                                 break;
2719                         }
2720                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722                 }
2723                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724                         switch (reg_offset) {
2725                         case 0:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                                 break;
2731                         case 1:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 2:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 3:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 4:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2754                                 break;
2755                         case 5:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2760                                 break;
2761                         case 6:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2766                                 break;
2767                         case 8:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                                 break;
2773                         case 9:
2774                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2778                                 break;
2779                         case 10:
2780                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2784                                 break;
2785                         case 11:
2786                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2790                                 break;
2791                         case 12:
2792                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2796                                 break;
2797                         case 13:
2798                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2802                                 break;
2803                         case 14:
2804                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2808                                 break;
2809                         default:
2810                                 gb_tile_moden = 0;
2811                                 break;
2812                         }
2813                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815                 }
2816         } else if (num_pipe_configs == 4) {
2817                 if (num_rbs == 4) {
2818                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819                                 switch (reg_offset) {
2820                                 case 0:
2821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825                                         break;
2826                                 case 1:
2827                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831                                         break;
2832                                 case 2:
2833                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837                                         break;
2838                                 case 3:
2839                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843                                         break;
2844                                 case 4:
2845                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                                          TILE_SPLIT(split_equal_to_row_size));
2849                                         break;
2850                                 case 5:
2851                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                                         break;
2855                                 case 6:
2856                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860                                         break;
2861                                 case 7:
2862                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                                          TILE_SPLIT(split_equal_to_row_size));
2866                                         break;
2867                                 case 8:
2868                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870                                         break;
2871                                 case 9:
2872                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875                                         break;
2876                                 case 10:
2877                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881                                         break;
2882                                 case 11:
2883                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887                                         break;
2888                                 case 12:
2889                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893                                         break;
2894                                 case 13:
2895                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898                                         break;
2899                                 case 14:
2900                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                                         break;
2905                                 case 16:
2906                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910                                         break;
2911                                 case 17:
2912                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                                         break;
2917                                 case 27:
2918                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921                                         break;
2922                                 case 28:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                                         break;
2928                                 case 29:
2929                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                                         break;
2934                                 case 30:
2935                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                                         break;
2940                                 default:
2941                                         gb_tile_moden = 0;
2942                                         break;
2943                                 }
2944                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946                         }
2947                 } else if (num_rbs < 4) {
2948                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949                                 switch (reg_offset) {
2950                                 case 0:
2951                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955                                         break;
2956                                 case 1:
2957                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961                                         break;
2962                                 case 2:
2963                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967                                         break;
2968                                 case 3:
2969                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973                                         break;
2974                                 case 4:
2975                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978                                                          TILE_SPLIT(split_equal_to_row_size));
2979                                         break;
2980                                 case 5:
2981                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984                                         break;
2985                                 case 6:
2986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990                                         break;
2991                                 case 7:
2992                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995                                                          TILE_SPLIT(split_equal_to_row_size));
2996                                         break;
2997                                 case 8:
2998                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000                                         break;
3001                                 case 9:
3002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005                                         break;
3006                                 case 10:
3007                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                                         break;
3012                                 case 11:
3013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                                         break;
3018                                 case 12:
3019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023                                         break;
3024                                 case 13:
3025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028                                         break;
3029                                 case 14:
3030                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034                                         break;
3035                                 case 16:
3036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                                         break;
3041                                 case 17:
3042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                                         break;
3047                                 case 27:
3048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051                                         break;
3052                                 case 28:
3053                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                                         break;
3058                                 case 29:
3059                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063                                         break;
3064                                 case 30:
3065                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                                         break;
3070                                 default:
3071                                         gb_tile_moden = 0;
3072                                         break;
3073                                 }
3074                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076                         }
3077                 }
3078                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079                         switch (reg_offset) {
3080                         case 0:
3081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3085                                 break;
3086                         case 1:
3087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3091                                 break;
3092                         case 2:
3093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3097                                 break;
3098                         case 3:
3099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3103                                 break;
3104                         case 4:
3105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                                 break;
3110                         case 5:
3111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3115                                 break;
3116                         case 6:
3117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3121                                 break;
3122                         case 8:
3123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3127                                 break;
3128                         case 9:
3129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                                 break;
3134                         case 10:
3135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3139                                 break;
3140                         case 11:
3141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3145                                 break;
3146                         case 12:
3147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3151                                 break;
3152                         case 13:
3153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3157                                 break;
3158                         case 14:
3159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3163                                 break;
3164                         default:
3165                                 gb_tile_moden = 0;
3166                                 break;
3167                         }
3168                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170                 }
3171         } else if (num_pipe_configs == 2) {
3172                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173                         switch (reg_offset) {
3174                         case 0:
3175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179                                 break;
3180                         case 1:
3181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185                                 break;
3186                         case 2:
3187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191                                 break;
3192                         case 3:
3193                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197                                 break;
3198                         case 4:
3199                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                                  TILE_SPLIT(split_equal_to_row_size));
3203                                 break;
3204                         case 5:
3205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208                                 break;
3209                         case 6:
3210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214                                 break;
3215                         case 7:
3216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                                  TILE_SPLIT(split_equal_to_row_size));
3220                                 break;
3221                         case 8:
3222                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223                                                 PIPE_CONFIG(ADDR_SURF_P2);
3224                                 break;
3225                         case 9:
3226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228                                                  PIPE_CONFIG(ADDR_SURF_P2));
3229                                 break;
3230                         case 10:
3231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                                 break;
3236                         case 11:
3237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241                                 break;
3242                         case 12:
3243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                                 break;
3248                         case 13:
3249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252                                 break;
3253                         case 14:
3254                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                                 break;
3259                         case 16:
3260                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264                                 break;
3265                         case 17:
3266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                                 break;
3271                         case 27:
3272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274                                                  PIPE_CONFIG(ADDR_SURF_P2));
3275                                 break;
3276                         case 28:
3277                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281                                 break;
3282                         case 29:
3283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                                 break;
3288                         case 30:
3289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                                 break;
3294                         default:
3295                                 gb_tile_moden = 0;
3296                                 break;
3297                         }
3298                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300                 }
3301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302                         switch (reg_offset) {
3303                         case 0:
3304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                                 break;
3309                         case 1:
3310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                                 break;
3315                         case 2:
3316                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3320                                 break;
3321                         case 3:
3322                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3326                                 break;
3327                         case 4:
3328                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3332                                 break;
3333                         case 5:
3334                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3338                                 break;
3339                         case 6:
3340                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3344                                 break;
3345                         case 8:
3346                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                                 break;
3351                         case 9:
3352                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                                 break;
3357                         case 10:
3358                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3362                                 break;
3363                         case 11:
3364                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                                 break;
3369                         case 12:
3370                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3374                                 break;
3375                         case 13:
3376                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                                 break;
3381                         case 14:
3382                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3386                                 break;
3387                         default:
3388                                 gb_tile_moden = 0;
3389                                 break;
3390                         }
3391                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393                 }
3394         } else
3395                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410                              u32 se_num, u32 sh_num)
3411 {
3412         u32 data = INSTANCE_BROADCAST_WRITES;
3413
3414         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416         else if (se_num == 0xffffffff)
3417                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418         else if (sh_num == 0xffffffff)
3419                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420         else
3421                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422         WREG32(GRBM_GFX_INDEX, data);
3423 }
3424
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435         u32 i, mask = 0;
3436
3437         for (i = 0; i < bit_width; i++) {
3438                 mask <<= 1;
3439                 mask |= 1;
3440         }
3441         return mask;
3442 }
3443
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456                               u32 max_rb_num_per_se,
3457                               u32 sh_per_se)
3458 {
3459         u32 data, mask;
3460
3461         data = RREG32(CC_RB_BACKEND_DISABLE);
3462         if (data & 1)
3463                 data &= BACKEND_DISABLE_MASK;
3464         else
3465                 data = 0;
3466         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467
3468         data >>= BACKEND_DISABLE_SHIFT;
3469
3470         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471
3472         return data & mask;
3473 }
3474
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486                          u32 se_num, u32 sh_per_se,
3487                          u32 max_rb_num_per_se)
3488 {
3489         int i, j;
3490         u32 data, mask;
3491         u32 disabled_rbs = 0;
3492         u32 enabled_rbs = 0;
3493
3494         mutex_lock(&rdev->grbm_idx_mutex);
3495         for (i = 0; i < se_num; i++) {
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         cik_select_se_sh(rdev, i, j);
3498                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499                         if (rdev->family == CHIP_HAWAII)
3500                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501                         else
3502                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503                 }
3504         }
3505         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506         mutex_unlock(&rdev->grbm_idx_mutex);
3507
3508         mask = 1;
3509         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510                 if (!(disabled_rbs & mask))
3511                         enabled_rbs |= mask;
3512                 mask <<= 1;
3513         }
3514
3515         rdev->config.cik.backend_enable_mask = enabled_rbs;
3516
3517         mutex_lock(&rdev->grbm_idx_mutex);
3518         for (i = 0; i < se_num; i++) {
3519                 cik_select_se_sh(rdev, i, 0xffffffff);
3520                 data = 0;
3521                 for (j = 0; j < sh_per_se; j++) {
3522                         switch (enabled_rbs & 3) {
3523                         case 0:
3524                                 if (j == 0)
3525                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526                                 else
3527                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528                                 break;
3529                         case 1:
3530                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531                                 break;
3532                         case 2:
3533                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534                                 break;
3535                         case 3:
3536                         default:
3537                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538                                 break;
3539                         }
3540                         enabled_rbs >>= 2;
3541                 }
3542                 WREG32(PA_SC_RASTER_CONFIG, data);
3543         }
3544         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545         mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559         u32 mc_shared_chmap, mc_arb_ramcfg;
3560         u32 hdp_host_path_cntl;
3561         u32 tmp;
3562         int i, j;
3563
3564         switch (rdev->family) {
3565         case CHIP_BONAIRE:
3566                 rdev->config.cik.max_shader_engines = 2;
3567                 rdev->config.cik.max_tile_pipes = 4;
3568                 rdev->config.cik.max_cu_per_sh = 7;
3569                 rdev->config.cik.max_sh_per_se = 1;
3570                 rdev->config.cik.max_backends_per_se = 2;
3571                 rdev->config.cik.max_texture_channel_caches = 4;
3572                 rdev->config.cik.max_gprs = 256;
3573                 rdev->config.cik.max_gs_threads = 32;
3574                 rdev->config.cik.max_hw_contexts = 8;
3575
3576                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581                 break;
3582         case CHIP_HAWAII:
3583                 rdev->config.cik.max_shader_engines = 4;
3584                 rdev->config.cik.max_tile_pipes = 16;
3585                 rdev->config.cik.max_cu_per_sh = 11;
3586                 rdev->config.cik.max_sh_per_se = 1;
3587                 rdev->config.cik.max_backends_per_se = 4;
3588                 rdev->config.cik.max_texture_channel_caches = 16;
3589                 rdev->config.cik.max_gprs = 256;
3590                 rdev->config.cik.max_gs_threads = 32;
3591                 rdev->config.cik.max_hw_contexts = 8;
3592
3593                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598                 break;
3599         case CHIP_KAVERI:
3600                 rdev->config.cik.max_shader_engines = 1;
3601                 rdev->config.cik.max_tile_pipes = 4;
3602                 if ((rdev->pdev->device == 0x1304) ||
3603                     (rdev->pdev->device == 0x1305) ||
3604                     (rdev->pdev->device == 0x130C) ||
3605                     (rdev->pdev->device == 0x130F) ||
3606                     (rdev->pdev->device == 0x1310) ||
3607                     (rdev->pdev->device == 0x1311) ||
3608                     (rdev->pdev->device == 0x131C)) {
3609                         rdev->config.cik.max_cu_per_sh = 8;
3610                         rdev->config.cik.max_backends_per_se = 2;
3611                 } else if ((rdev->pdev->device == 0x1309) ||
3612                            (rdev->pdev->device == 0x130A) ||
3613                            (rdev->pdev->device == 0x130D) ||
3614                            (rdev->pdev->device == 0x1313) ||
3615                            (rdev->pdev->device == 0x131D)) {
3616                         rdev->config.cik.max_cu_per_sh = 6;
3617                         rdev->config.cik.max_backends_per_se = 2;
3618                 } else if ((rdev->pdev->device == 0x1306) ||
3619                            (rdev->pdev->device == 0x1307) ||
3620                            (rdev->pdev->device == 0x130B) ||
3621                            (rdev->pdev->device == 0x130E) ||
3622                            (rdev->pdev->device == 0x1315) ||
3623                            (rdev->pdev->device == 0x1318) ||
3624                            (rdev->pdev->device == 0x131B)) {
3625                         rdev->config.cik.max_cu_per_sh = 4;
3626                         rdev->config.cik.max_backends_per_se = 1;
3627                 } else {
3628                         rdev->config.cik.max_cu_per_sh = 3;
3629                         rdev->config.cik.max_backends_per_se = 1;
3630                 }
3631                 rdev->config.cik.max_sh_per_se = 1;
3632                 rdev->config.cik.max_texture_channel_caches = 4;
3633                 rdev->config.cik.max_gprs = 256;
3634                 rdev->config.cik.max_gs_threads = 16;
3635                 rdev->config.cik.max_hw_contexts = 8;
3636
3637                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642                 break;
3643         case CHIP_KABINI:
3644         case CHIP_MULLINS:
3645         default:
3646                 rdev->config.cik.max_shader_engines = 1;
3647                 rdev->config.cik.max_tile_pipes = 2;
3648                 rdev->config.cik.max_cu_per_sh = 2;
3649                 rdev->config.cik.max_sh_per_se = 1;
3650                 rdev->config.cik.max_backends_per_se = 1;
3651                 rdev->config.cik.max_texture_channel_caches = 2;
3652                 rdev->config.cik.max_gprs = 256;
3653                 rdev->config.cik.max_gs_threads = 16;
3654                 rdev->config.cik.max_hw_contexts = 8;
3655
3656                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661                 break;
3662         }
3663
3664         /* Initialize HDP */
3665         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666                 WREG32((0x2c14 + j), 0x00000000);
3667                 WREG32((0x2c18 + j), 0x00000000);
3668                 WREG32((0x2c1c + j), 0x00000000);
3669                 WREG32((0x2c20 + j), 0x00000000);
3670                 WREG32((0x2c24 + j), 0x00000000);
3671         }
3672
3673         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674         WREG32(SRBM_INT_CNTL, 0x1);
3675         WREG32(SRBM_INT_ACK, 0x1);
3676
3677         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678
3679         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681
3682         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683         rdev->config.cik.mem_max_burst_length_bytes = 256;
3684         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686         if (rdev->config.cik.mem_row_size_in_kb > 4)
3687                 rdev->config.cik.mem_row_size_in_kb = 4;
3688         /* XXX use MC settings? */
3689         rdev->config.cik.shader_engine_tile_size = 32;
3690         rdev->config.cik.num_gpus = 1;
3691         rdev->config.cik.multi_gpu_tile_size = 64;
3692
3693         /* fix up row size */
3694         gb_addr_config &= ~ROW_SIZE_MASK;
3695         switch (rdev->config.cik.mem_row_size_in_kb) {
3696         case 1:
3697         default:
3698                 gb_addr_config |= ROW_SIZE(0);
3699                 break;
3700         case 2:
3701                 gb_addr_config |= ROW_SIZE(1);
3702                 break;
3703         case 4:
3704                 gb_addr_config |= ROW_SIZE(2);
3705                 break;
3706         }
3707
3708         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3709          * not have bank info, so create a custom tiling dword.
3710          * bits 3:0   num_pipes
3711          * bits 7:4   num_banks
3712          * bits 11:8  group_size
3713          * bits 15:12 row_size
3714          */
3715         rdev->config.cik.tile_config = 0;
3716         switch (rdev->config.cik.num_tile_pipes) {
3717         case 1:
3718                 rdev->config.cik.tile_config |= (0 << 0);
3719                 break;
3720         case 2:
3721                 rdev->config.cik.tile_config |= (1 << 0);
3722                 break;
3723         case 4:
3724                 rdev->config.cik.tile_config |= (2 << 0);
3725                 break;
3726         case 8:
3727         default:
3728                 /* XXX what about 12? */
3729                 rdev->config.cik.tile_config |= (3 << 0);
3730                 break;
3731         }
3732         rdev->config.cik.tile_config |=
3733                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734         rdev->config.cik.tile_config |=
3735                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736         rdev->config.cik.tile_config |=
3737                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738
3739         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747
3748         cik_tiling_mode_table_init(rdev);
3749
3750         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751                      rdev->config.cik.max_sh_per_se,
3752                      rdev->config.cik.max_backends_per_se);
3753
3754         rdev->config.cik.active_cus = 0;
3755         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757                         rdev->config.cik.active_cus +=
3758                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759                 }
3760         }
3761
3762         /* set HW defaults for 3D engine */
3763         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764
3765         mutex_lock(&rdev->grbm_idx_mutex);
3766         /*
3767          * making sure that the following register writes will be broadcasted
3768          * to all the shaders
3769          */
3770         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771         WREG32(SX_DEBUG_1, 0x20);
3772
3773         WREG32(TA_CNTL_AUX, 0x00010000);
3774
3775         tmp = RREG32(SPI_CONFIG_CNTL);
3776         tmp |= 0x03000000;
3777         WREG32(SPI_CONFIG_CNTL, tmp);
3778
3779         WREG32(SQ_CONFIG, 1);
3780
3781         WREG32(DB_DEBUG, 0);
3782
3783         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784         tmp |= 0x00000400;
3785         WREG32(DB_DEBUG2, tmp);
3786
3787         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788         tmp |= 0x00020200;
3789         WREG32(DB_DEBUG3, tmp);
3790
3791         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792         tmp |= 0x00018208;
3793         WREG32(CB_HW_CONTROL, tmp);
3794
3795         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796
3797         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801
3802         WREG32(VGT_NUM_INSTANCES, 1);
3803
3804         WREG32(CP_PERFMON_CNTL, 0);
3805
3806         WREG32(SQ_CONFIG, 0);
3807
3808         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809                                           FORCE_EOV_MAX_REZ_CNT(255)));
3810
3811         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813
3814         WREG32(VGT_GS_VERTEX_REUSE, 16);
3815         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816
3817         tmp = RREG32(HDP_MISC_CNTL);
3818         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819         WREG32(HDP_MISC_CNTL, tmp);
3820
3821         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823
3824         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826         mutex_unlock(&rdev->grbm_idx_mutex);
3827
3828         udelay(50);
3829 }
3830
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846         int i;
3847
3848         rdev->scratch.num_reg = 7;
3849         rdev->scratch.reg_base = SCRATCH_REG0;
3850         for (i = 0; i < rdev->scratch.num_reg; i++) {
3851                 rdev->scratch.free[i] = true;
3852                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853         }
3854 }
3855
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869         uint32_t scratch;
3870         uint32_t tmp = 0;
3871         unsigned i;
3872         int r;
3873
3874         r = radeon_scratch_get(rdev, &scratch);
3875         if (r) {
3876                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877                 return r;
3878         }
3879         WREG32(scratch, 0xCAFEDEAD);
3880         r = radeon_ring_lock(rdev, ring, 3);
3881         if (r) {
3882                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883                 radeon_scratch_free(rdev, scratch);
3884                 return r;
3885         }
3886         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888         radeon_ring_write(ring, 0xDEADBEEF);
3889         radeon_ring_unlock_commit(rdev, ring, false);
3890
3891         for (i = 0; i < rdev->usec_timeout; i++) {
3892                 tmp = RREG32(scratch);
3893                 if (tmp == 0xDEADBEEF)
3894                         break;
3895                 DRM_UDELAY(1);
3896         }
3897         if (i < rdev->usec_timeout) {
3898                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899         } else {
3900                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901                           ring->idx, scratch, tmp);
3902                 r = -EINVAL;
3903         }
3904         radeon_scratch_free(rdev, scratch);
3905         return r;
3906 }
3907
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917                                        int ridx)
3918 {
3919         struct radeon_ring *ring = &rdev->ring[ridx];
3920         u32 ref_and_mask;
3921
3922         switch (ring->idx) {
3923         case CAYMAN_RING_TYPE_CP1_INDEX:
3924         case CAYMAN_RING_TYPE_CP2_INDEX:
3925         default:
3926                 switch (ring->me) {
3927                 case 0:
3928                         ref_and_mask = CP2 << ring->pipe;
3929                         break;
3930                 case 1:
3931                         ref_and_mask = CP6 << ring->pipe;
3932                         break;
3933                 default:
3934                         return;
3935                 }
3936                 break;
3937         case RADEON_RING_TYPE_GFX_INDEX:
3938                 ref_and_mask = CP0;
3939                 break;
3940         }
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948         radeon_ring_write(ring, ref_and_mask);
3949         radeon_ring_write(ring, ref_and_mask);
3950         radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963                              struct radeon_fence *fence)
3964 {
3965         struct radeon_ring *ring = &rdev->ring[fence->ring];
3966         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967
3968         /* Workaround for cache flush problems. First send a dummy EOP
3969          * event down the pipe with seq one below.
3970          */
3971         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973                                  EOP_TC_ACTION_EN |
3974                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975                                  EVENT_INDEX(5)));
3976         radeon_ring_write(ring, addr & 0xfffffffc);
3977         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978                                 DATA_SEL(1) | INT_SEL(0));
3979         radeon_ring_write(ring, fence->seq - 1);
3980         radeon_ring_write(ring, 0);
3981
3982         /* Then send the real EOP event down the pipe. */
3983         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985                                  EOP_TC_ACTION_EN |
3986                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987                                  EVENT_INDEX(5)));
3988         radeon_ring_write(ring, addr & 0xfffffffc);
3989         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990         radeon_ring_write(ring, fence->seq);
3991         radeon_ring_write(ring, 0);
3992 }
3993
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004                                  struct radeon_fence *fence)
4005 {
4006         struct radeon_ring *ring = &rdev->ring[fence->ring];
4007         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008
4009         /* RELEASE_MEM - flush caches, send int */
4010         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012                                  EOP_TC_ACTION_EN |
4013                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014                                  EVENT_INDEX(5)));
4015         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016         radeon_ring_write(ring, addr & 0xfffffffc);
4017         radeon_ring_write(ring, upper_32_bits(addr));
4018         radeon_ring_write(ring, fence->seq);
4019         radeon_ring_write(ring, 0);
4020 }
4021
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034                              struct radeon_ring *ring,
4035                              struct radeon_semaphore *semaphore,
4036                              bool emit_wait)
4037 {
4038         uint64_t addr = semaphore->gpu_addr;
4039         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040
4041         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042         radeon_ring_write(ring, lower_32_bits(addr));
4043         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044
4045         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046                 /* Prevent the PFP from running ahead of the semaphore wait */
4047                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048                 radeon_ring_write(ring, 0x0);
4049         }
4050
4051         return true;
4052 }
4053
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068                                     uint64_t src_offset, uint64_t dst_offset,
4069                                     unsigned num_gpu_pages,
4070                                     struct reservation_object *resv)
4071 {
4072         struct radeon_fence *fence;
4073         struct radeon_sync sync;
4074         int ring_index = rdev->asic->copy.blit_ring_index;
4075         struct radeon_ring *ring = &rdev->ring[ring_index];
4076         u32 size_in_bytes, cur_size_in_bytes, control;
4077         int i, num_loops;
4078         int r = 0;
4079
4080         radeon_sync_create(&sync);
4081
4082         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085         if (r) {
4086                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4087                 radeon_sync_free(rdev, &sync, NULL);
4088                 return ERR_PTR(r);
4089         }
4090
4091         radeon_sync_resv(rdev, &sync, resv, false);
4092         radeon_sync_rings(rdev, &sync, ring->idx);
4093
4094         for (i = 0; i < num_loops; i++) {
4095                 cur_size_in_bytes = size_in_bytes;
4096                 if (cur_size_in_bytes > 0x1fffff)
4097                         cur_size_in_bytes = 0x1fffff;
4098                 size_in_bytes -= cur_size_in_bytes;
4099                 control = 0;
4100                 if (size_in_bytes == 0)
4101                         control |= PACKET3_DMA_DATA_CP_SYNC;
4102                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103                 radeon_ring_write(ring, control);
4104                 radeon_ring_write(ring, lower_32_bits(src_offset));
4105                 radeon_ring_write(ring, upper_32_bits(src_offset));
4106                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4107                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4108                 radeon_ring_write(ring, cur_size_in_bytes);
4109                 src_offset += cur_size_in_bytes;
4110                 dst_offset += cur_size_in_bytes;
4111         }
4112
4113         r = radeon_fence_emit(rdev, &fence, ring->idx);
4114         if (r) {
4115                 radeon_ring_unlock_undo(rdev, ring);
4116                 radeon_sync_free(rdev, &sync, NULL);
4117                 return ERR_PTR(r);
4118         }
4119
4120         radeon_ring_unlock_commit(rdev, ring, false);
4121         radeon_sync_free(rdev, &sync, fence);
4122
4123         return fence;
4124 }
4125
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143         struct radeon_ring *ring = &rdev->ring[ib->ring];
4144         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145         u32 header, control = INDIRECT_BUFFER_VALID;
4146
4147         if (ib->is_const_ib) {
4148                 /* set switch buffer packet before const IB */
4149                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150                 radeon_ring_write(ring, 0);
4151
4152                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153         } else {
4154                 u32 next_rptr;
4155                 if (ring->rptr_save_reg) {
4156                         next_rptr = ring->wptr + 3 + 4;
4157                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4159                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4160                         radeon_ring_write(ring, next_rptr);
4161                 } else if (rdev->wb.enabled) {
4162                         next_rptr = ring->wptr + 5 + 4;
4163                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167                         radeon_ring_write(ring, next_rptr);
4168                 }
4169
4170                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171         }
4172
4173         control |= ib->length_dw | (vm_id << 24);
4174
4175         radeon_ring_write(ring, header);
4176         radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178                           (2 << 0) |
4179 #endif
4180                           (ib->gpu_addr & 0xFFFFFFFC));
4181         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182         radeon_ring_write(ring, control);
4183 }
4184
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197         struct radeon_ib ib;
4198         uint32_t scratch;
4199         uint32_t tmp = 0;
4200         unsigned i;
4201         int r;
4202
4203         r = radeon_scratch_get(rdev, &scratch);
4204         if (r) {
4205                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206                 return r;
4207         }
4208         WREG32(scratch, 0xCAFEDEAD);
4209         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210         if (r) {
4211                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212                 radeon_scratch_free(rdev, scratch);
4213                 return r;
4214         }
4215         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217         ib.ptr[2] = 0xDEADBEEF;
4218         ib.length_dw = 3;
4219         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220         if (r) {
4221                 radeon_scratch_free(rdev, scratch);
4222                 radeon_ib_free(rdev, &ib);
4223                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224                 return r;
4225         }
4226         r = radeon_fence_wait(ib.fence, false);
4227         if (r) {
4228                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229                 radeon_scratch_free(rdev, scratch);
4230                 radeon_ib_free(rdev, &ib);
4231                 return r;
4232         }
4233         for (i = 0; i < rdev->usec_timeout; i++) {
4234                 tmp = RREG32(scratch);
4235                 if (tmp == 0xDEADBEEF)
4236                         break;
4237                 DRM_UDELAY(1);
4238         }
4239         if (i < rdev->usec_timeout) {
4240                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241         } else {
4242                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243                           scratch, tmp);
4244                 r = -EINVAL;
4245         }
4246         radeon_scratch_free(rdev, scratch);
4247         radeon_ib_free(rdev, &ib);
4248         return r;
4249 }
4250
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284         if (enable)
4285                 WREG32(CP_ME_CNTL, 0);
4286         else {
4287                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291         }
4292         udelay(50);
4293 }
4294
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305         int i;
4306
4307         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308                 return -EINVAL;
4309
4310         cik_cp_gfx_enable(rdev, false);
4311
4312         if (rdev->new_fw) {
4313                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4316                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317                 const struct gfx_firmware_header_v1_0 *me_hdr =
4318                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319                 const __le32 *fw_data;
4320                 u32 fw_size;
4321
4322                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325
4326                 /* PFP */
4327                 fw_data = (const __le32 *)
4328                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330                 WREG32(CP_PFP_UCODE_ADDR, 0);
4331                 for (i = 0; i < fw_size; i++)
4332                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334
4335                 /* CE */
4336                 fw_data = (const __le32 *)
4337                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339                 WREG32(CP_CE_UCODE_ADDR, 0);
4340                 for (i = 0; i < fw_size; i++)
4341                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343
4344                 /* ME */
4345                 fw_data = (const __be32 *)
4346                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348                 WREG32(CP_ME_RAM_WADDR, 0);
4349                 for (i = 0; i < fw_size; i++)
4350                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353         } else {
4354                 const __be32 *fw_data;
4355
4356                 /* PFP */
4357                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4358                 WREG32(CP_PFP_UCODE_ADDR, 0);
4359                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361                 WREG32(CP_PFP_UCODE_ADDR, 0);
4362
4363                 /* CE */
4364                 fw_data = (const __be32 *)rdev->ce_fw->data;
4365                 WREG32(CP_CE_UCODE_ADDR, 0);
4366                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368                 WREG32(CP_CE_UCODE_ADDR, 0);
4369
4370                 /* ME */
4371                 fw_data = (const __be32 *)rdev->me_fw->data;
4372                 WREG32(CP_ME_RAM_WADDR, 0);
4373                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375                 WREG32(CP_ME_RAM_WADDR, 0);
4376         }
4377
4378         return 0;
4379 }
4380
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393         int r, i;
4394
4395         /* init the CP */
4396         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397         WREG32(CP_ENDIAN_SWAP, 0);
4398         WREG32(CP_DEVICE_ID, 1);
4399
4400         cik_cp_gfx_enable(rdev, true);
4401
4402         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403         if (r) {
4404                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405                 return r;
4406         }
4407
4408         /* init the CE partitions.  CE only used for gfx on CIK */
4409         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411         radeon_ring_write(ring, 0x8000);
4412         radeon_ring_write(ring, 0x8000);
4413
4414         /* setup clear context state */
4415         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417
4418         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419         radeon_ring_write(ring, 0x80000000);
4420         radeon_ring_write(ring, 0x80000000);
4421
4422         for (i = 0; i < cik_default_size; i++)
4423                 radeon_ring_write(ring, cik_default_state[i]);
4424
4425         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427
4428         /* set clear context state */
4429         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430         radeon_ring_write(ring, 0);
4431
4432         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433         radeon_ring_write(ring, 0x00000316);
4434         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436
4437         radeon_ring_unlock_commit(rdev, ring, false);
4438
4439         return 0;
4440 }
4441
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452         cik_cp_gfx_enable(rdev, false);
4453         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467         struct radeon_ring *ring;
4468         u32 tmp;
4469         u32 rb_bufsz;
4470         u64 rb_addr;
4471         int r;
4472
4473         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474         if (rdev->family != CHIP_HAWAII)
4475                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476
4477         /* Set the write pointer delay */
4478         WREG32(CP_RB_WPTR_DELAY, 0);
4479
4480         /* set the RB to use vmid 0 */
4481         WREG32(CP_RB_VMID, 0);
4482
4483         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484
4485         /* ring 0 - compute and gfx */
4486         /* Set ring buffer size */
4487         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488         rb_bufsz = order_base_2(ring->ring_size / 8);
4489         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491         tmp |= BUF_SWAP_32BIT;
4492 #endif
4493         WREG32(CP_RB0_CNTL, tmp);
4494
4495         /* Initialize the ring buffer's read and write pointers */
4496         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497         ring->wptr = 0;
4498         WREG32(CP_RB0_WPTR, ring->wptr);
4499
4500         /* set the wb address wether it's enabled or not */
4501         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503
4504         /* scratch register shadowing is no longer supported */
4505         WREG32(SCRATCH_UMSK, 0);
4506
4507         if (!rdev->wb.enabled)
4508                 tmp |= RB_NO_UPDATE;
4509
4510         mdelay(1);
4511         WREG32(CP_RB0_CNTL, tmp);
4512
4513         rb_addr = ring->gpu_addr >> 8;
4514         WREG32(CP_RB0_BASE, rb_addr);
4515         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516
4517         /* start the ring */
4518         cik_cp_gfx_start(rdev);
4519         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521         if (r) {
4522                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523                 return r;
4524         }
4525
4526         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528
4529         return 0;
4530 }
4531
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533                      struct radeon_ring *ring)
4534 {
4535         u32 rptr;
4536
4537         if (rdev->wb.enabled)
4538                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4539         else
4540                 rptr = RREG32(CP_RB0_RPTR);
4541
4542         return rptr;
4543 }
4544
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546                      struct radeon_ring *ring)
4547 {
4548         u32 wptr;
4549
4550         wptr = RREG32(CP_RB0_WPTR);
4551
4552         return wptr;
4553 }
4554
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556                       struct radeon_ring *ring)
4557 {
4558         WREG32(CP_RB0_WPTR, ring->wptr);
4559         (void)RREG32(CP_RB0_WPTR);
4560 }
4561
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563                          struct radeon_ring *ring)
4564 {
4565         u32 rptr;
4566
4567         if (rdev->wb.enabled) {
4568                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4569         } else {
4570                 mutex_lock(&rdev->srbm_mutex);
4571                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572                 rptr = RREG32(CP_HQD_PQ_RPTR);
4573                 cik_srbm_select(rdev, 0, 0, 0, 0);
4574                 mutex_unlock(&rdev->srbm_mutex);
4575         }
4576
4577         return rptr;
4578 }
4579
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581                          struct radeon_ring *ring)
4582 {
4583         u32 wptr;
4584
4585         if (rdev->wb.enabled) {
4586                 /* XXX check if swapping is necessary on BE */
4587                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4588         } else {
4589                 mutex_lock(&rdev->srbm_mutex);
4590                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591                 wptr = RREG32(CP_HQD_PQ_WPTR);
4592                 cik_srbm_select(rdev, 0, 0, 0, 0);
4593                 mutex_unlock(&rdev->srbm_mutex);
4594         }
4595
4596         return wptr;
4597 }
4598
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600                           struct radeon_ring *ring)
4601 {
4602         /* XXX check if swapping is necessary on BE */
4603         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604         WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606
4607 /**
4608  * cik_cp_compute_enable - enable/disable the compute CP MEs
4609  *
4610  * @rdev: radeon_device pointer
4611  * @enable: enable or disable the MEs
4612  *
4613  * Halts or unhalts the compute MEs.
4614  */
4615 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616 {
4617         if (enable)
4618                 WREG32(CP_MEC_CNTL, 0);
4619         else {
4620                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4621                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4622                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4623         }
4624         udelay(50);
4625 }
4626
4627 /**
4628  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4629  *
4630  * @rdev: radeon_device pointer
4631  *
4632  * Loads the compute MEC1&2 ucode.
4633  * Returns 0 for success, -EINVAL if the ucode is not available.
4634  */
4635 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4636 {
4637         int i;
4638
4639         if (!rdev->mec_fw)
4640                 return -EINVAL;
4641
4642         cik_cp_compute_enable(rdev, false);
4643
4644         if (rdev->new_fw) {
4645                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4646                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4647                 const __le32 *fw_data;
4648                 u32 fw_size;
4649
4650                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4651
4652                 /* MEC1 */
4653                 fw_data = (const __le32 *)
4654                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4655                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4656                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4657                 for (i = 0; i < fw_size; i++)
4658                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4659                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4660
4661                 /* MEC2 */
4662                 if (rdev->family == CHIP_KAVERI) {
4663                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4664                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4665
4666                         fw_data = (const __le32 *)
4667                                 (rdev->mec2_fw->data +
4668                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4669                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4670                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4671                         for (i = 0; i < fw_size; i++)
4672                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4673                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4674                 }
4675         } else {
4676                 const __be32 *fw_data;
4677
4678                 /* MEC1 */
4679                 fw_data = (const __be32 *)rdev->mec_fw->data;
4680                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4681                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4682                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4683                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4684
4685                 if (rdev->family == CHIP_KAVERI) {
4686                         /* MEC2 */
4687                         fw_data = (const __be32 *)rdev->mec_fw->data;
4688                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4689                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4690                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4691                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4692                 }
4693         }
4694
4695         return 0;
4696 }
4697
4698 /**
4699  * cik_cp_compute_start - start the compute queues
4700  *
4701  * @rdev: radeon_device pointer
4702  *
4703  * Enable the compute queues.
4704  * Returns 0 for success, error for failure.
4705  */
4706 static int cik_cp_compute_start(struct radeon_device *rdev)
4707 {
4708         cik_cp_compute_enable(rdev, true);
4709
4710         return 0;
4711 }
4712
4713 /**
4714  * cik_cp_compute_fini - stop the compute queues
4715  *
4716  * @rdev: radeon_device pointer
4717  *
4718  * Stop the compute queues and tear down the driver queue
4719  * info.
4720  */
4721 static void cik_cp_compute_fini(struct radeon_device *rdev)
4722 {
4723         int i, idx, r;
4724
4725         cik_cp_compute_enable(rdev, false);
4726
4727         for (i = 0; i < 2; i++) {
4728                 if (i == 0)
4729                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4730                 else
4731                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4732
4733                 if (rdev->ring[idx].mqd_obj) {
4734                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4735                         if (unlikely(r != 0))
4736                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4737
4738                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4739                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4740
4741                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4742                         rdev->ring[idx].mqd_obj = NULL;
4743                 }
4744         }
4745 }
4746
4747 static void cik_mec_fini(struct radeon_device *rdev)
4748 {
4749         int r;
4750
4751         if (rdev->mec.hpd_eop_obj) {
4752                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4753                 if (unlikely(r != 0))
4754                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4755                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4756                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4757
4758                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4759                 rdev->mec.hpd_eop_obj = NULL;
4760         }
4761 }
4762
4763 #define MEC_HPD_SIZE 2048
4764
4765 static int cik_mec_init(struct radeon_device *rdev)
4766 {
4767         int r;
4768         u32 *hpd;
4769
4770         /*
4771          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4772          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4773          * Nonetheless, we assign only 1 pipe because all other pipes will
4774          * be handled by KFD
4775          */
4776         rdev->mec.num_mec = 1;
4777         rdev->mec.num_pipe = 1;
4778         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4779
4780         if (rdev->mec.hpd_eop_obj == NULL) {
4781                 r = radeon_bo_create(rdev,
4782                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4783                                      PAGE_SIZE, true,
4784                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4785                                      &rdev->mec.hpd_eop_obj);
4786                 if (r) {
4787                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4788                         return r;
4789                 }
4790         }
4791
4792         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4793         if (unlikely(r != 0)) {
4794                 cik_mec_fini(rdev);
4795                 return r;
4796         }
4797         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4798                           &rdev->mec.hpd_eop_gpu_addr);
4799         if (r) {
4800                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4801                 cik_mec_fini(rdev);
4802                 return r;
4803         }
4804         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4805         if (r) {
4806                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4807                 cik_mec_fini(rdev);
4808                 return r;
4809         }
4810
4811         /* clear memory.  Not sure if this is required or not */
4812         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4813
4814         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4815         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4816
4817         return 0;
4818 }
4819
4820 struct hqd_registers
4821 {
4822         u32 cp_mqd_base_addr;
4823         u32 cp_mqd_base_addr_hi;
4824         u32 cp_hqd_active;
4825         u32 cp_hqd_vmid;
4826         u32 cp_hqd_persistent_state;
4827         u32 cp_hqd_pipe_priority;
4828         u32 cp_hqd_queue_priority;
4829         u32 cp_hqd_quantum;
4830         u32 cp_hqd_pq_base;
4831         u32 cp_hqd_pq_base_hi;
4832         u32 cp_hqd_pq_rptr;
4833         u32 cp_hqd_pq_rptr_report_addr;
4834         u32 cp_hqd_pq_rptr_report_addr_hi;
4835         u32 cp_hqd_pq_wptr_poll_addr;
4836         u32 cp_hqd_pq_wptr_poll_addr_hi;
4837         u32 cp_hqd_pq_doorbell_control;
4838         u32 cp_hqd_pq_wptr;
4839         u32 cp_hqd_pq_control;
4840         u32 cp_hqd_ib_base_addr;
4841         u32 cp_hqd_ib_base_addr_hi;
4842         u32 cp_hqd_ib_rptr;
4843         u32 cp_hqd_ib_control;
4844         u32 cp_hqd_iq_timer;
4845         u32 cp_hqd_iq_rptr;
4846         u32 cp_hqd_dequeue_request;
4847         u32 cp_hqd_dma_offload;
4848         u32 cp_hqd_sema_cmd;
4849         u32 cp_hqd_msg_type;
4850         u32 cp_hqd_atomic0_preop_lo;
4851         u32 cp_hqd_atomic0_preop_hi;
4852         u32 cp_hqd_atomic1_preop_lo;
4853         u32 cp_hqd_atomic1_preop_hi;
4854         u32 cp_hqd_hq_scheduler0;
4855         u32 cp_hqd_hq_scheduler1;
4856         u32 cp_mqd_control;
4857 };
4858
4859 struct bonaire_mqd
4860 {
4861         u32 header;
4862         u32 dispatch_initiator;
4863         u32 dimensions[3];
4864         u32 start_idx[3];
4865         u32 num_threads[3];
4866         u32 pipeline_stat_enable;
4867         u32 perf_counter_enable;
4868         u32 pgm[2];
4869         u32 tba[2];
4870         u32 tma[2];
4871         u32 pgm_rsrc[2];
4872         u32 vmid;
4873         u32 resource_limits;
4874         u32 static_thread_mgmt01[2];
4875         u32 tmp_ring_size;
4876         u32 static_thread_mgmt23[2];
4877         u32 restart[3];
4878         u32 thread_trace_enable;
4879         u32 reserved1;
4880         u32 user_data[16];
4881         u32 vgtcs_invoke_count[2];
4882         struct hqd_registers queue_state;
4883         u32 dequeue_cntr;
4884         u32 interrupt_queue[64];
4885 };
4886
4887 /**
4888  * cik_cp_compute_resume - setup the compute queue registers
4889  *
4890  * @rdev: radeon_device pointer
4891  *
4892  * Program the compute queues and test them to make sure they
4893  * are working.
4894  * Returns 0 for success, error for failure.
4895  */
4896 static int cik_cp_compute_resume(struct radeon_device *rdev)
4897 {
4898         int r, i, j, idx;
4899         u32 tmp;
4900         bool use_doorbell = true;
4901         u64 hqd_gpu_addr;
4902         u64 mqd_gpu_addr;
4903         u64 eop_gpu_addr;
4904         u64 wb_gpu_addr;
4905         u32 *buf;
4906         struct bonaire_mqd *mqd;
4907
4908         r = cik_cp_compute_start(rdev);
4909         if (r)
4910                 return r;
4911
4912         /* fix up chicken bits */
4913         tmp = RREG32(CP_CPF_DEBUG);
4914         tmp |= (1 << 23);
4915         WREG32(CP_CPF_DEBUG, tmp);
4916
4917         /* init the pipes */
4918         mutex_lock(&rdev->srbm_mutex);
4919
4920         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4921
4922         cik_srbm_select(rdev, 0, 0, 0, 0);
4923
4924         /* write the EOP addr */
4925         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4926         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4927
4928         /* set the VMID assigned */
4929         WREG32(CP_HPD_EOP_VMID, 0);
4930
4931         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4932         tmp = RREG32(CP_HPD_EOP_CONTROL);
4933         tmp &= ~EOP_SIZE_MASK;
4934         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4935         WREG32(CP_HPD_EOP_CONTROL, tmp);
4936
4937         mutex_unlock(&rdev->srbm_mutex);
4938
4939         /* init the queues.  Just two for now. */
4940         for (i = 0; i < 2; i++) {
4941                 if (i == 0)
4942                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4943                 else
4944                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4945
4946                 if (rdev->ring[idx].mqd_obj == NULL) {
4947                         r = radeon_bo_create(rdev,
4948                                              sizeof(struct bonaire_mqd),
4949                                              PAGE_SIZE, true,
4950                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4951                                              NULL, &rdev->ring[idx].mqd_obj);
4952                         if (r) {
4953                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4954                                 return r;
4955                         }
4956                 }
4957
4958                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4959                 if (unlikely(r != 0)) {
4960                         cik_cp_compute_fini(rdev);
4961                         return r;
4962                 }
4963                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4964                                   &mqd_gpu_addr);
4965                 if (r) {
4966                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4967                         cik_cp_compute_fini(rdev);
4968                         return r;
4969                 }
4970                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4971                 if (r) {
4972                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4973                         cik_cp_compute_fini(rdev);
4974                         return r;
4975                 }
4976
4977                 /* init the mqd struct */
4978                 memset(buf, 0, sizeof(struct bonaire_mqd));
4979
4980                 mqd = (struct bonaire_mqd *)buf;
4981                 mqd->header = 0xC0310800;
4982                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4983                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4984                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4985                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4986
4987                 mutex_lock(&rdev->srbm_mutex);
4988                 cik_srbm_select(rdev, rdev->ring[idx].me,
4989                                 rdev->ring[idx].pipe,
4990                                 rdev->ring[idx].queue, 0);
4991
4992                 /* disable wptr polling */
4993                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4994                 tmp &= ~WPTR_POLL_EN;
4995                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4996
4997                 /* enable doorbell? */
4998                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4999                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5000                 if (use_doorbell)
5001                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5002                 else
5003                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5004                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5005                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5006
5007                 /* disable the queue if it's active */
5008                 mqd->queue_state.cp_hqd_dequeue_request = 0;
5009                 mqd->queue_state.cp_hqd_pq_rptr = 0;
5010                 mqd->queue_state.cp_hqd_pq_wptr= 0;
5011                 if (RREG32(CP_HQD_ACTIVE) & 1) {
5012                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5013                         for (j = 0; j < rdev->usec_timeout; j++) {
5014                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5015                                         break;
5016                                 udelay(1);
5017                         }
5018                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5019                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5020                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5021                 }
5022
5023                 /* set the pointer to the MQD */
5024                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5025                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5026                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5027                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5028                 /* set MQD vmid to 0 */
5029                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5030                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5031                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5032
5033                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5034                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5035                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5036                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5037                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5038                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5039
5040                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5041                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5042                 mqd->queue_state.cp_hqd_pq_control &=
5043                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5044
5045                 mqd->queue_state.cp_hqd_pq_control |=
5046                         order_base_2(rdev->ring[idx].ring_size / 8);
5047                 mqd->queue_state.cp_hqd_pq_control |=
5048                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5049 #ifdef __BIG_ENDIAN
5050                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5051 #endif
5052                 mqd->queue_state.cp_hqd_pq_control &=
5053                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5054                 mqd->queue_state.cp_hqd_pq_control |=
5055                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5056                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5057
5058                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5059                 if (i == 0)
5060                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5061                 else
5062                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5063                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5064                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5065                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5066                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5067                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5068
5069                 /* set the wb address wether it's enabled or not */
5070                 if (i == 0)
5071                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5072                 else
5073                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5074                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5075                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5076                         upper_32_bits(wb_gpu_addr) & 0xffff;
5077                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5078                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5079                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5080                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5081
5082                 /* enable the doorbell if requested */
5083                 if (use_doorbell) {
5084                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5085                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5086                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5087                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5088                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5089                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5090                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5091                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5092
5093                 } else {
5094                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5095                 }
5096                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5097                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5098
5099                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5100                 rdev->ring[idx].wptr = 0;
5101                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5102                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5103                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5104
5105                 /* set the vmid for the queue */
5106                 mqd->queue_state.cp_hqd_vmid = 0;
5107                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5108
5109                 /* activate the queue */
5110                 mqd->queue_state.cp_hqd_active = 1;
5111                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5112
5113                 cik_srbm_select(rdev, 0, 0, 0, 0);
5114                 mutex_unlock(&rdev->srbm_mutex);
5115
5116                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5117                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5118
5119                 rdev->ring[idx].ready = true;
5120                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5121                 if (r)
5122                         rdev->ring[idx].ready = false;
5123         }
5124
5125         return 0;
5126 }
5127
5128 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5129 {
5130         cik_cp_gfx_enable(rdev, enable);
5131         cik_cp_compute_enable(rdev, enable);
5132 }
5133
5134 static int cik_cp_load_microcode(struct radeon_device *rdev)
5135 {
5136         int r;
5137
5138         r = cik_cp_gfx_load_microcode(rdev);
5139         if (r)
5140                 return r;
5141         r = cik_cp_compute_load_microcode(rdev);
5142         if (r)
5143                 return r;
5144
5145         return 0;
5146 }
5147
5148 static void cik_cp_fini(struct radeon_device *rdev)
5149 {
5150         cik_cp_gfx_fini(rdev);
5151         cik_cp_compute_fini(rdev);
5152 }
5153
5154 static int cik_cp_resume(struct radeon_device *rdev)
5155 {
5156         int r;
5157
5158         cik_enable_gui_idle_interrupt(rdev, false);
5159
5160         r = cik_cp_load_microcode(rdev);
5161         if (r)
5162                 return r;
5163
5164         r = cik_cp_gfx_resume(rdev);
5165         if (r)
5166                 return r;
5167         r = cik_cp_compute_resume(rdev);
5168         if (r)
5169                 return r;
5170
5171         cik_enable_gui_idle_interrupt(rdev, true);
5172
5173         return 0;
5174 }
5175
5176 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5177 {
5178         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5179                 RREG32(GRBM_STATUS));
5180         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5181                 RREG32(GRBM_STATUS2));
5182         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5183                 RREG32(GRBM_STATUS_SE0));
5184         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5185                 RREG32(GRBM_STATUS_SE1));
5186         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5187                 RREG32(GRBM_STATUS_SE2));
5188         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5189                 RREG32(GRBM_STATUS_SE3));
5190         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5191                 RREG32(SRBM_STATUS));
5192         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5193                 RREG32(SRBM_STATUS2));
5194         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5195                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5196         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5197                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5198         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5199         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5200                  RREG32(CP_STALLED_STAT1));
5201         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5202                  RREG32(CP_STALLED_STAT2));
5203         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5204                  RREG32(CP_STALLED_STAT3));
5205         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5206                  RREG32(CP_CPF_BUSY_STAT));
5207         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5208                  RREG32(CP_CPF_STALLED_STAT1));
5209         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5210         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5211         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5212                  RREG32(CP_CPC_STALLED_STAT1));
5213         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5214 }
5215
5216 /**
5217  * cik_gpu_check_soft_reset - check which blocks are busy
5218  *
5219  * @rdev: radeon_device pointer
5220  *
5221  * Check which blocks are busy and return the relevant reset
5222  * mask to be used by cik_gpu_soft_reset().
5223  * Returns a mask of the blocks to be reset.
5224  */
5225 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5226 {
5227         u32 reset_mask = 0;
5228         u32 tmp;
5229
5230         /* GRBM_STATUS */
5231         tmp = RREG32(GRBM_STATUS);
5232         if (tmp & (PA_BUSY | SC_BUSY |
5233                    BCI_BUSY | SX_BUSY |
5234                    TA_BUSY | VGT_BUSY |
5235                    DB_BUSY | CB_BUSY |
5236                    GDS_BUSY | SPI_BUSY |
5237                    IA_BUSY | IA_BUSY_NO_DMA))
5238                 reset_mask |= RADEON_RESET_GFX;
5239
5240         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5241                 reset_mask |= RADEON_RESET_CP;
5242
5243         /* GRBM_STATUS2 */
5244         tmp = RREG32(GRBM_STATUS2);
5245         if (tmp & RLC_BUSY)
5246                 reset_mask |= RADEON_RESET_RLC;
5247
5248         /* SDMA0_STATUS_REG */
5249         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5250         if (!(tmp & SDMA_IDLE))
5251                 reset_mask |= RADEON_RESET_DMA;
5252
5253         /* SDMA1_STATUS_REG */
5254         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5255         if (!(tmp & SDMA_IDLE))
5256                 reset_mask |= RADEON_RESET_DMA1;
5257
5258         /* SRBM_STATUS2 */
5259         tmp = RREG32(SRBM_STATUS2);
5260         if (tmp & SDMA_BUSY)
5261                 reset_mask |= RADEON_RESET_DMA;
5262
5263         if (tmp & SDMA1_BUSY)
5264                 reset_mask |= RADEON_RESET_DMA1;
5265
5266         /* SRBM_STATUS */
5267         tmp = RREG32(SRBM_STATUS);
5268
5269         if (tmp & IH_BUSY)
5270                 reset_mask |= RADEON_RESET_IH;
5271
5272         if (tmp & SEM_BUSY)
5273                 reset_mask |= RADEON_RESET_SEM;
5274
5275         if (tmp & GRBM_RQ_PENDING)
5276                 reset_mask |= RADEON_RESET_GRBM;
5277
5278         if (tmp & VMC_BUSY)
5279                 reset_mask |= RADEON_RESET_VMC;
5280
5281         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5282                    MCC_BUSY | MCD_BUSY))
5283                 reset_mask |= RADEON_RESET_MC;
5284
5285         if (evergreen_is_display_hung(rdev))
5286                 reset_mask |= RADEON_RESET_DISPLAY;
5287
5288         /* Skip MC reset as it's mostly likely not hung, just busy */
5289         if (reset_mask & RADEON_RESET_MC) {
5290                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5291                 reset_mask &= ~RADEON_RESET_MC;
5292         }
5293
5294         return reset_mask;
5295 }
5296
5297 /**
5298  * cik_gpu_soft_reset - soft reset GPU
5299  *
5300  * @rdev: radeon_device pointer
5301  * @reset_mask: mask of which blocks to reset
5302  *
5303  * Soft reset the blocks specified in @reset_mask.
5304  */
5305 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5306 {
5307         struct evergreen_mc_save save;
5308         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5309         u32 tmp;
5310
5311         if (reset_mask == 0)
5312                 return;
5313
5314         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5315
5316         cik_print_gpu_status_regs(rdev);
5317         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5318                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5319         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5320                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5321
5322         /* disable CG/PG */
5323         cik_fini_pg(rdev);
5324         cik_fini_cg(rdev);
5325
5326         /* stop the rlc */
5327         cik_rlc_stop(rdev);
5328
5329         /* Disable GFX parsing/prefetching */
5330         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5331
5332         /* Disable MEC parsing/prefetching */
5333         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5334
5335         if (reset_mask & RADEON_RESET_DMA) {
5336                 /* sdma0 */
5337                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5338                 tmp |= SDMA_HALT;
5339                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5340         }
5341         if (reset_mask & RADEON_RESET_DMA1) {
5342                 /* sdma1 */
5343                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5344                 tmp |= SDMA_HALT;
5345                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5346         }
5347
5348         evergreen_mc_stop(rdev, &save);
5349         if (evergreen_mc_wait_for_idle(rdev)) {
5350                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351         }
5352
5353         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5354                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5355
5356         if (reset_mask & RADEON_RESET_CP) {
5357                 grbm_soft_reset |= SOFT_RESET_CP;
5358
5359                 srbm_soft_reset |= SOFT_RESET_GRBM;
5360         }
5361
5362         if (reset_mask & RADEON_RESET_DMA)
5363                 srbm_soft_reset |= SOFT_RESET_SDMA;
5364
5365         if (reset_mask & RADEON_RESET_DMA1)
5366                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5367
5368         if (reset_mask & RADEON_RESET_DISPLAY)
5369                 srbm_soft_reset |= SOFT_RESET_DC;
5370
5371         if (reset_mask & RADEON_RESET_RLC)
5372                 grbm_soft_reset |= SOFT_RESET_RLC;
5373
5374         if (reset_mask & RADEON_RESET_SEM)
5375                 srbm_soft_reset |= SOFT_RESET_SEM;
5376
5377         if (reset_mask & RADEON_RESET_IH)
5378                 srbm_soft_reset |= SOFT_RESET_IH;
5379
5380         if (reset_mask & RADEON_RESET_GRBM)
5381                 srbm_soft_reset |= SOFT_RESET_GRBM;
5382
5383         if (reset_mask & RADEON_RESET_VMC)
5384                 srbm_soft_reset |= SOFT_RESET_VMC;
5385
5386         if (!(rdev->flags & RADEON_IS_IGP)) {
5387                 if (reset_mask & RADEON_RESET_MC)
5388                         srbm_soft_reset |= SOFT_RESET_MC;
5389         }
5390
5391         if (grbm_soft_reset) {
5392                 tmp = RREG32(GRBM_SOFT_RESET);
5393                 tmp |= grbm_soft_reset;
5394                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5395                 WREG32(GRBM_SOFT_RESET, tmp);
5396                 tmp = RREG32(GRBM_SOFT_RESET);
5397
5398                 udelay(50);
5399
5400                 tmp &= ~grbm_soft_reset;
5401                 WREG32(GRBM_SOFT_RESET, tmp);
5402                 tmp = RREG32(GRBM_SOFT_RESET);
5403         }
5404
5405         if (srbm_soft_reset) {
5406                 tmp = RREG32(SRBM_SOFT_RESET);
5407                 tmp |= srbm_soft_reset;
5408                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5409                 WREG32(SRBM_SOFT_RESET, tmp);
5410                 tmp = RREG32(SRBM_SOFT_RESET);
5411
5412                 udelay(50);
5413
5414                 tmp &= ~srbm_soft_reset;
5415                 WREG32(SRBM_SOFT_RESET, tmp);
5416                 tmp = RREG32(SRBM_SOFT_RESET);
5417         }
5418
5419         /* Wait a little for things to settle down */
5420         udelay(50);
5421
5422         evergreen_mc_resume(rdev, &save);
5423         udelay(50);
5424
5425         cik_print_gpu_status_regs(rdev);
5426 }
5427
5428 struct kv_reset_save_regs {
5429         u32 gmcon_reng_execute;
5430         u32 gmcon_misc;
5431         u32 gmcon_misc3;
5432 };
5433
5434 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5435                                    struct kv_reset_save_regs *save)
5436 {
5437         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5438         save->gmcon_misc = RREG32(GMCON_MISC);
5439         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5440
5441         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5442         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5443                                                 STCTRL_STUTTER_EN));
5444 }
5445
5446 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5447                                       struct kv_reset_save_regs *save)
5448 {
5449         int i;
5450
5451         WREG32(GMCON_PGFSM_WRITE, 0);
5452         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5453
5454         for (i = 0; i < 5; i++)
5455                 WREG32(GMCON_PGFSM_WRITE, 0);
5456
5457         WREG32(GMCON_PGFSM_WRITE, 0);
5458         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5459
5460         for (i = 0; i < 5; i++)
5461                 WREG32(GMCON_PGFSM_WRITE, 0);
5462
5463         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5464         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5465
5466         for (i = 0; i < 5; i++)
5467                 WREG32(GMCON_PGFSM_WRITE, 0);
5468
5469         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5470         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5471
5472         for (i = 0; i < 5; i++)
5473                 WREG32(GMCON_PGFSM_WRITE, 0);
5474
5475         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5476         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5477
5478         for (i = 0; i < 5; i++)
5479                 WREG32(GMCON_PGFSM_WRITE, 0);
5480
5481         WREG32(GMCON_PGFSM_WRITE, 0);
5482         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5483
5484         for (i = 0; i < 5; i++)
5485                 WREG32(GMCON_PGFSM_WRITE, 0);
5486
5487         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5488         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5489
5490         for (i = 0; i < 5; i++)
5491                 WREG32(GMCON_PGFSM_WRITE, 0);
5492
5493         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5494         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5495
5496         for (i = 0; i < 5; i++)
5497                 WREG32(GMCON_PGFSM_WRITE, 0);
5498
5499         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5500         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5501
5502         for (i = 0; i < 5; i++)
5503                 WREG32(GMCON_PGFSM_WRITE, 0);
5504
5505         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5506         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5507
5508         for (i = 0; i < 5; i++)
5509                 WREG32(GMCON_PGFSM_WRITE, 0);
5510
5511         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5512         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5513
5514         WREG32(GMCON_MISC3, save->gmcon_misc3);
5515         WREG32(GMCON_MISC, save->gmcon_misc);
5516         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5517 }
5518
5519 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5520 {
5521         struct evergreen_mc_save save;
5522         struct kv_reset_save_regs kv_save = { 0 };
5523         u32 tmp, i;
5524
5525         dev_info(rdev->dev, "GPU pci config reset\n");
5526
5527         /* disable dpm? */
5528
5529         /* disable cg/pg */
5530         cik_fini_pg(rdev);
5531         cik_fini_cg(rdev);
5532
5533         /* Disable GFX parsing/prefetching */
5534         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5535
5536         /* Disable MEC parsing/prefetching */
5537         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5538
5539         /* sdma0 */
5540         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5541         tmp |= SDMA_HALT;
5542         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5543         /* sdma1 */
5544         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5545         tmp |= SDMA_HALT;
5546         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5547         /* XXX other engines? */
5548
5549         /* halt the rlc, disable cp internal ints */
5550         cik_rlc_stop(rdev);
5551
5552         udelay(50);
5553
5554         /* disable mem access */
5555         evergreen_mc_stop(rdev, &save);
5556         if (evergreen_mc_wait_for_idle(rdev)) {
5557                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5558         }
5559
5560         if (rdev->flags & RADEON_IS_IGP)
5561                 kv_save_regs_for_reset(rdev, &kv_save);
5562
5563         /* disable BM */
5564         pci_clear_master(rdev->pdev);
5565         /* reset */
5566         radeon_pci_config_reset(rdev);
5567
5568         udelay(100);
5569
5570         /* wait for asic to come out of reset */
5571         for (i = 0; i < rdev->usec_timeout; i++) {
5572                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5573                         break;
5574                 udelay(1);
5575         }
5576
5577         /* does asic init need to be run first??? */
5578         if (rdev->flags & RADEON_IS_IGP)
5579                 kv_restore_regs_for_reset(rdev, &kv_save);
5580 }
5581
5582 /**
5583  * cik_asic_reset - soft reset GPU
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Look up which blocks are hung and attempt
5588  * to reset them.
5589  * Returns 0 for success.
5590  */
5591 int cik_asic_reset(struct radeon_device *rdev)
5592 {
5593         u32 reset_mask;
5594
5595         reset_mask = cik_gpu_check_soft_reset(rdev);
5596
5597         if (reset_mask)
5598                 r600_set_bios_scratch_engine_hung(rdev, true);
5599
5600         /* try soft reset */
5601         cik_gpu_soft_reset(rdev, reset_mask);
5602
5603         reset_mask = cik_gpu_check_soft_reset(rdev);
5604
5605         /* try pci config reset */
5606         if (reset_mask && radeon_hard_reset)
5607                 cik_gpu_pci_config_reset(rdev);
5608
5609         reset_mask = cik_gpu_check_soft_reset(rdev);
5610
5611         if (!reset_mask)
5612                 r600_set_bios_scratch_engine_hung(rdev, false);
5613
5614         return 0;
5615 }
5616
5617 /**
5618  * cik_gfx_is_lockup - check if the 3D engine is locked up
5619  *
5620  * @rdev: radeon_device pointer
5621  * @ring: radeon_ring structure holding ring information
5622  *
5623  * Check if the 3D engine is locked up (CIK).
5624  * Returns true if the engine is locked, false if not.
5625  */
5626 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5627 {
5628         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5629
5630         if (!(reset_mask & (RADEON_RESET_GFX |
5631                             RADEON_RESET_COMPUTE |
5632                             RADEON_RESET_CP))) {
5633                 radeon_ring_lockup_update(rdev, ring);
5634                 return false;
5635         }
5636         return radeon_ring_test_lockup(rdev, ring);
5637 }
5638
5639 /* MC */
5640 /**
5641  * cik_mc_program - program the GPU memory controller
5642  *
5643  * @rdev: radeon_device pointer
5644  *
5645  * Set the location of vram, gart, and AGP in the GPU's
5646  * physical address space (CIK).
5647  */
5648 static void cik_mc_program(struct radeon_device *rdev)
5649 {
5650         struct evergreen_mc_save save;
5651         u32 tmp;
5652         int i, j;
5653
5654         /* Initialize HDP */
5655         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5656                 WREG32((0x2c14 + j), 0x00000000);
5657                 WREG32((0x2c18 + j), 0x00000000);
5658                 WREG32((0x2c1c + j), 0x00000000);
5659                 WREG32((0x2c20 + j), 0x00000000);
5660                 WREG32((0x2c24 + j), 0x00000000);
5661         }
5662         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5663
5664         evergreen_mc_stop(rdev, &save);
5665         if (radeon_mc_wait_for_idle(rdev)) {
5666                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5667         }
5668         /* Lockout access through VGA aperture*/
5669         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5670         /* Update configuration */
5671         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5672                rdev->mc.vram_start >> 12);
5673         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5674                rdev->mc.vram_end >> 12);
5675         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5676                rdev->vram_scratch.gpu_addr >> 12);
5677         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5678         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5679         WREG32(MC_VM_FB_LOCATION, tmp);
5680         /* XXX double check these! */
5681         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5682         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5683         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5684         WREG32(MC_VM_AGP_BASE, 0);
5685         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5686         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5687         if (radeon_mc_wait_for_idle(rdev)) {
5688                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5689         }
5690         evergreen_mc_resume(rdev, &save);
5691         /* we need to own VRAM, so turn off the VGA renderer here
5692          * to stop it overwriting our objects */
5693         rv515_vga_render_disable(rdev);
5694 }
5695
5696 /**
5697  * cik_mc_init - initialize the memory controller driver params
5698  *
5699  * @rdev: radeon_device pointer
5700  *
5701  * Look up the amount of vram, vram width, and decide how to place
5702  * vram and gart within the GPU's physical address space (CIK).
5703  * Returns 0 for success.
5704  */
5705 static int cik_mc_init(struct radeon_device *rdev)
5706 {
5707         u32 tmp;
5708         int chansize, numchan;
5709
5710         /* Get VRAM informations */
5711         rdev->mc.vram_is_ddr = true;
5712         tmp = RREG32(MC_ARB_RAMCFG);
5713         if (tmp & CHANSIZE_MASK) {
5714                 chansize = 64;
5715         } else {
5716                 chansize = 32;
5717         }
5718         tmp = RREG32(MC_SHARED_CHMAP);
5719         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5720         case 0:
5721         default:
5722                 numchan = 1;
5723                 break;
5724         case 1:
5725                 numchan = 2;
5726                 break;
5727         case 2:
5728                 numchan = 4;
5729                 break;
5730         case 3:
5731                 numchan = 8;
5732                 break;
5733         case 4:
5734                 numchan = 3;
5735                 break;
5736         case 5:
5737                 numchan = 6;
5738                 break;
5739         case 6:
5740                 numchan = 10;
5741                 break;
5742         case 7:
5743                 numchan = 12;
5744                 break;
5745         case 8:
5746                 numchan = 16;
5747                 break;
5748         }
5749         rdev->mc.vram_width = numchan * chansize;
5750         /* Could aper size report 0 ? */
5751         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5752         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5753         /* size in MB on si */
5754         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5755         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5756         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5757         si_vram_gtt_location(rdev, &rdev->mc);
5758         radeon_update_bandwidth_info(rdev);
5759
5760         return 0;
5761 }
5762
5763 /*
5764  * GART
5765  * VMID 0 is the physical GPU addresses as used by the kernel.
5766  * VMIDs 1-15 are used for userspace clients and are handled
5767  * by the radeon vm/hsa code.
5768  */
5769 /**
5770  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5771  *
5772  * @rdev: radeon_device pointer
5773  *
5774  * Flush the TLB for the VMID 0 page table (CIK).
5775  */
5776 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5777 {
5778         /* flush hdp cache */
5779         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5780
5781         /* bits 0-15 are the VM contexts0-15 */
5782         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5783 }
5784
5785 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5786 {
5787         int i;
5788         uint32_t sh_mem_bases, sh_mem_config;
5789
5790         sh_mem_bases = 0x6000 | 0x6000 << 16;
5791         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5792         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5793
5794         mutex_lock(&rdev->srbm_mutex);
5795         for (i = 8; i < 16; i++) {
5796                 cik_srbm_select(rdev, 0, 0, 0, i);
5797                 /* CP and shaders */
5798                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5799                 WREG32(SH_MEM_APE1_BASE, 1);
5800                 WREG32(SH_MEM_APE1_LIMIT, 0);
5801                 WREG32(SH_MEM_BASES, sh_mem_bases);
5802         }
5803         cik_srbm_select(rdev, 0, 0, 0, 0);
5804         mutex_unlock(&rdev->srbm_mutex);
5805 }
5806
5807 /**
5808  * cik_pcie_gart_enable - gart enable
5809  *
5810  * @rdev: radeon_device pointer
5811  *
5812  * This sets up the TLBs, programs the page tables for VMID0,
5813  * sets up the hw for VMIDs 1-15 which are allocated on
5814  * demand, and sets up the global locations for the LDS, GDS,
5815  * and GPUVM for FSA64 clients (CIK).
5816  * Returns 0 for success, errors for failure.
5817  */
5818 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5819 {
5820         int r, i;
5821
5822         if (rdev->gart.robj == NULL) {
5823                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5824                 return -EINVAL;
5825         }
5826         r = radeon_gart_table_vram_pin(rdev);
5827         if (r)
5828                 return r;
5829         /* Setup TLB control */
5830         WREG32(MC_VM_MX_L1_TLB_CNTL,
5831                (0xA << 7) |
5832                ENABLE_L1_TLB |
5833                ENABLE_L1_FRAGMENT_PROCESSING |
5834                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5835                ENABLE_ADVANCED_DRIVER_MODEL |
5836                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5837         /* Setup L2 cache */
5838         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5839                ENABLE_L2_FRAGMENT_PROCESSING |
5840                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5841                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5842                EFFECTIVE_L2_QUEUE_SIZE(7) |
5843                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5844         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5845         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5846                BANK_SELECT(4) |
5847                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5848         /* setup context0 */
5849         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5850         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5851         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5852         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5853                         (u32)(rdev->dummy_page.addr >> 12));
5854         WREG32(VM_CONTEXT0_CNTL2, 0);
5855         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5856                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5857
5858         WREG32(0x15D4, 0);
5859         WREG32(0x15D8, 0);
5860         WREG32(0x15DC, 0);
5861
5862         /* restore context1-15 */
5863         /* set vm size, must be a multiple of 4 */
5864         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5865         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5866         for (i = 1; i < 16; i++) {
5867                 if (i < 8)
5868                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5869                                rdev->vm_manager.saved_table_addr[i]);
5870                 else
5871                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5872                                rdev->vm_manager.saved_table_addr[i]);
5873         }
5874
5875         /* enable context1-15 */
5876         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5877                (u32)(rdev->dummy_page.addr >> 12));
5878         WREG32(VM_CONTEXT1_CNTL2, 4);
5879         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5880                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5881                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5882                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5883                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5884                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5885                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5886                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5887                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5888                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5889                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5890                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5891                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5892                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5893
5894         if (rdev->family == CHIP_KAVERI) {
5895                 u32 tmp = RREG32(CHUB_CONTROL);
5896                 tmp &= ~BYPASS_VM;
5897                 WREG32(CHUB_CONTROL, tmp);
5898         }
5899
5900         /* XXX SH_MEM regs */
5901         /* where to put LDS, scratch, GPUVM in FSA64 space */
5902         mutex_lock(&rdev->srbm_mutex);
5903         for (i = 0; i < 16; i++) {
5904                 cik_srbm_select(rdev, 0, 0, 0, i);
5905                 /* CP and shaders */
5906                 WREG32(SH_MEM_CONFIG, 0);
5907                 WREG32(SH_MEM_APE1_BASE, 1);
5908                 WREG32(SH_MEM_APE1_LIMIT, 0);
5909                 WREG32(SH_MEM_BASES, 0);
5910                 /* SDMA GFX */
5911                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5912                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5913                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5914                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5915                 /* XXX SDMA RLC - todo */
5916         }
5917         cik_srbm_select(rdev, 0, 0, 0, 0);
5918         mutex_unlock(&rdev->srbm_mutex);
5919
5920         cik_pcie_init_compute_vmid(rdev);
5921
5922         cik_pcie_gart_tlb_flush(rdev);
5923         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5924                  (unsigned)(rdev->mc.gtt_size >> 20),
5925                  (unsigned long long)rdev->gart.table_addr);
5926         rdev->gart.ready = true;
5927         return 0;
5928 }
5929
5930 /**
5931  * cik_pcie_gart_disable - gart disable
5932  *
5933  * @rdev: radeon_device pointer
5934  *
5935  * This disables all VM page table (CIK).
5936  */
5937 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5938 {
5939         unsigned i;
5940
5941         for (i = 1; i < 16; ++i) {
5942                 uint32_t reg;
5943                 if (i < 8)
5944                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5945                 else
5946                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5947                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5948         }
5949
5950         /* Disable all tables */
5951         WREG32(VM_CONTEXT0_CNTL, 0);
5952         WREG32(VM_CONTEXT1_CNTL, 0);
5953         /* Setup TLB control */
5954         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5955                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5956         /* Setup L2 cache */
5957         WREG32(VM_L2_CNTL,
5958                ENABLE_L2_FRAGMENT_PROCESSING |
5959                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5960                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5961                EFFECTIVE_L2_QUEUE_SIZE(7) |
5962                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5963         WREG32(VM_L2_CNTL2, 0);
5964         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5965                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5966         radeon_gart_table_vram_unpin(rdev);
5967 }
5968
5969 /**
5970  * cik_pcie_gart_fini - vm fini callback
5971  *
5972  * @rdev: radeon_device pointer
5973  *
5974  * Tears down the driver GART/VM setup (CIK).
5975  */
5976 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5977 {
5978         cik_pcie_gart_disable(rdev);
5979         radeon_gart_table_vram_free(rdev);
5980         radeon_gart_fini(rdev);
5981 }
5982
5983 /* vm parser */
5984 /**
5985  * cik_ib_parse - vm ib_parse callback
5986  *
5987  * @rdev: radeon_device pointer
5988  * @ib: indirect buffer pointer
5989  *
5990  * CIK uses hw IB checking so this is a nop (CIK).
5991  */
5992 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5993 {
5994         return 0;
5995 }
5996
5997 /*
5998  * vm
5999  * VMID 0 is the physical GPU addresses as used by the kernel.
6000  * VMIDs 1-15 are used for userspace clients and are handled
6001  * by the radeon vm/hsa code.
6002  */
6003 /**
6004  * cik_vm_init - cik vm init callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Inits cik specific vm parameters (number of VMs, base of vram for
6009  * VMIDs 1-15) (CIK).
6010  * Returns 0 for success.
6011  */
6012 int cik_vm_init(struct radeon_device *rdev)
6013 {
6014         /*
6015          * number of VMs
6016          * VMID 0 is reserved for System
6017          * radeon graphics/compute will use VMIDs 1-7
6018          * amdkfd will use VMIDs 8-15
6019          */
6020         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6021         /* base offset of vram pages */
6022         if (rdev->flags & RADEON_IS_IGP) {
6023                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6024                 tmp <<= 22;
6025                 rdev->vm_manager.vram_base_offset = tmp;
6026         } else
6027                 rdev->vm_manager.vram_base_offset = 0;
6028
6029         return 0;
6030 }
6031
6032 /**
6033  * cik_vm_fini - cik vm fini callback
6034  *
6035  * @rdev: radeon_device pointer
6036  *
6037  * Tear down any asic specific VM setup (CIK).
6038  */
6039 void cik_vm_fini(struct radeon_device *rdev)
6040 {
6041 }
6042
6043 /**
6044  * cik_vm_decode_fault - print human readable fault info
6045  *
6046  * @rdev: radeon_device pointer
6047  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6048  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6049  *
6050  * Print human readable fault information (CIK).
6051  */
6052 static void cik_vm_decode_fault(struct radeon_device *rdev,
6053                                 u32 status, u32 addr, u32 mc_client)
6054 {
6055         u32 mc_id;
6056         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6057         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6058         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6059                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6060
6061         if (rdev->family == CHIP_HAWAII)
6062                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6063         else
6064                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6065
6066         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6067                protections, vmid, addr,
6068                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6069                block, mc_client, mc_id);
6070 }
6071
6072 /**
6073  * cik_vm_flush - cik vm flush using the CP
6074  *
6075  * @rdev: radeon_device pointer
6076  *
6077  * Update the page table base and flush the VM TLB
6078  * using the CP (CIK).
6079  */
6080 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6081                   unsigned vm_id, uint64_t pd_addr)
6082 {
6083         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6084
6085         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6086         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6087                                  WRITE_DATA_DST_SEL(0)));
6088         if (vm_id < 8) {
6089                 radeon_ring_write(ring,
6090                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6091         } else {
6092                 radeon_ring_write(ring,
6093                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6094         }
6095         radeon_ring_write(ring, 0);
6096         radeon_ring_write(ring, pd_addr >> 12);
6097
6098         /* update SH_MEM_* regs */
6099         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6100         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6101                                  WRITE_DATA_DST_SEL(0)));
6102         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6103         radeon_ring_write(ring, 0);
6104         radeon_ring_write(ring, VMID(vm_id));
6105
6106         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6107         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6108                                  WRITE_DATA_DST_SEL(0)));
6109         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6110         radeon_ring_write(ring, 0);
6111
6112         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6113         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6114         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6115         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6116
6117         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6118         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6119                                  WRITE_DATA_DST_SEL(0)));
6120         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6121         radeon_ring_write(ring, 0);
6122         radeon_ring_write(ring, VMID(0));
6123
6124         /* HDP flush */
6125         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6126
6127         /* bits 0-15 are the VM contexts0-15 */
6128         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6129         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6130                                  WRITE_DATA_DST_SEL(0)));
6131         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6132         radeon_ring_write(ring, 0);
6133         radeon_ring_write(ring, 1 << vm_id);
6134
6135         /* wait for the invalidate to complete */
6136         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6137         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6138                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6139                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6140         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141         radeon_ring_write(ring, 0);
6142         radeon_ring_write(ring, 0); /* ref */
6143         radeon_ring_write(ring, 0); /* mask */
6144         radeon_ring_write(ring, 0x20); /* poll interval */
6145
6146         /* compute doesn't have PFP */
6147         if (usepfp) {
6148                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6149                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6150                 radeon_ring_write(ring, 0x0);
6151         }
6152 }
6153
6154 /*
6155  * RLC
6156  * The RLC is a multi-purpose microengine that handles a
6157  * variety of functions, the most important of which is
6158  * the interrupt controller.
6159  */
6160 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6161                                           bool enable)
6162 {
6163         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6164
6165         if (enable)
6166                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6167         else
6168                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6169         WREG32(CP_INT_CNTL_RING0, tmp);
6170 }
6171
6172 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6173 {
6174         u32 tmp;
6175
6176         tmp = RREG32(RLC_LB_CNTL);
6177         if (enable)
6178                 tmp |= LOAD_BALANCE_ENABLE;
6179         else
6180                 tmp &= ~LOAD_BALANCE_ENABLE;
6181         WREG32(RLC_LB_CNTL, tmp);
6182 }
6183
6184 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6185 {
6186         u32 i, j, k;
6187         u32 mask;
6188
6189         mutex_lock(&rdev->grbm_idx_mutex);
6190         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6191                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6192                         cik_select_se_sh(rdev, i, j);
6193                         for (k = 0; k < rdev->usec_timeout; k++) {
6194                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6195                                         break;
6196                                 udelay(1);
6197                         }
6198                 }
6199         }
6200         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6201         mutex_unlock(&rdev->grbm_idx_mutex);
6202
6203         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6204         for (k = 0; k < rdev->usec_timeout; k++) {
6205                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6206                         break;
6207                 udelay(1);
6208         }
6209 }
6210
6211 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6212 {
6213         u32 tmp;
6214
6215         tmp = RREG32(RLC_CNTL);
6216         if (tmp != rlc)
6217                 WREG32(RLC_CNTL, rlc);
6218 }
6219
6220 static u32 cik_halt_rlc(struct radeon_device *rdev)
6221 {
6222         u32 data, orig;
6223
6224         orig = data = RREG32(RLC_CNTL);
6225
6226         if (data & RLC_ENABLE) {
6227                 u32 i;
6228
6229                 data &= ~RLC_ENABLE;
6230                 WREG32(RLC_CNTL, data);
6231
6232                 for (i = 0; i < rdev->usec_timeout; i++) {
6233                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6234                                 break;
6235                         udelay(1);
6236                 }
6237
6238                 cik_wait_for_rlc_serdes(rdev);
6239         }
6240
6241         return orig;
6242 }
6243
6244 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6245 {
6246         u32 tmp, i, mask;
6247
6248         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6249         WREG32(RLC_GPR_REG2, tmp);
6250
6251         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6252         for (i = 0; i < rdev->usec_timeout; i++) {
6253                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6254                         break;
6255                 udelay(1);
6256         }
6257
6258         for (i = 0; i < rdev->usec_timeout; i++) {
6259                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6260                         break;
6261                 udelay(1);
6262         }
6263 }
6264
6265 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6266 {
6267         u32 tmp;
6268
6269         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6270         WREG32(RLC_GPR_REG2, tmp);
6271 }
6272
6273 /**
6274  * cik_rlc_stop - stop the RLC ME
6275  *
6276  * @rdev: radeon_device pointer
6277  *
6278  * Halt the RLC ME (MicroEngine) (CIK).
6279  */
6280 static void cik_rlc_stop(struct radeon_device *rdev)
6281 {
6282         WREG32(RLC_CNTL, 0);
6283
6284         cik_enable_gui_idle_interrupt(rdev, false);
6285
6286         cik_wait_for_rlc_serdes(rdev);
6287 }
6288
6289 /**
6290  * cik_rlc_start - start the RLC ME
6291  *
6292  * @rdev: radeon_device pointer
6293  *
6294  * Unhalt the RLC ME (MicroEngine) (CIK).
6295  */
6296 static void cik_rlc_start(struct radeon_device *rdev)
6297 {
6298         WREG32(RLC_CNTL, RLC_ENABLE);
6299
6300         cik_enable_gui_idle_interrupt(rdev, true);
6301
6302         udelay(50);
6303 }
6304
6305 /**
6306  * cik_rlc_resume - setup the RLC hw
6307  *
6308  * @rdev: radeon_device pointer
6309  *
6310  * Initialize the RLC registers, load the ucode,
6311  * and start the RLC (CIK).
6312  * Returns 0 for success, -EINVAL if the ucode is not available.
6313  */
6314 static int cik_rlc_resume(struct radeon_device *rdev)
6315 {
6316         u32 i, size, tmp;
6317
6318         if (!rdev->rlc_fw)
6319                 return -EINVAL;
6320
6321         cik_rlc_stop(rdev);
6322
6323         /* disable CG */
6324         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6325         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6326
6327         si_rlc_reset(rdev);
6328
6329         cik_init_pg(rdev);
6330
6331         cik_init_cg(rdev);
6332
6333         WREG32(RLC_LB_CNTR_INIT, 0);
6334         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6335
6336         mutex_lock(&rdev->grbm_idx_mutex);
6337         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6338         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6339         WREG32(RLC_LB_PARAMS, 0x00600408);
6340         WREG32(RLC_LB_CNTL, 0x80000004);
6341         mutex_unlock(&rdev->grbm_idx_mutex);
6342
6343         WREG32(RLC_MC_CNTL, 0);
6344         WREG32(RLC_UCODE_CNTL, 0);
6345
6346         if (rdev->new_fw) {
6347                 const struct rlc_firmware_header_v1_0 *hdr =
6348                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6349                 const __le32 *fw_data = (const __le32 *)
6350                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6351
6352                 radeon_ucode_print_rlc_hdr(&hdr->header);
6353
6354                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6355                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6356                 for (i = 0; i < size; i++)
6357                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6358                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6359         } else {
6360                 const __be32 *fw_data;
6361
6362                 switch (rdev->family) {
6363                 case CHIP_BONAIRE:
6364                 case CHIP_HAWAII:
6365                 default:
6366                         size = BONAIRE_RLC_UCODE_SIZE;
6367                         break;
6368                 case CHIP_KAVERI:
6369                         size = KV_RLC_UCODE_SIZE;
6370                         break;
6371                 case CHIP_KABINI:
6372                         size = KB_RLC_UCODE_SIZE;
6373                         break;
6374                 case CHIP_MULLINS:
6375                         size = ML_RLC_UCODE_SIZE;
6376                         break;
6377                 }
6378
6379                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6380                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6381                 for (i = 0; i < size; i++)
6382                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6383                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6384         }
6385
6386         /* XXX - find out what chips support lbpw */
6387         cik_enable_lbpw(rdev, false);
6388
6389         if (rdev->family == CHIP_BONAIRE)
6390                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6391
6392         cik_rlc_start(rdev);
6393
6394         return 0;
6395 }
6396
6397 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6398 {
6399         u32 data, orig, tmp, tmp2;
6400
6401         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6402
6403         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6404                 cik_enable_gui_idle_interrupt(rdev, true);
6405
6406                 tmp = cik_halt_rlc(rdev);
6407
6408                 mutex_lock(&rdev->grbm_idx_mutex);
6409                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6410                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6411                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6412                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6413                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6414                 mutex_unlock(&rdev->grbm_idx_mutex);
6415
6416                 cik_update_rlc(rdev, tmp);
6417
6418                 data |= CGCG_EN | CGLS_EN;
6419         } else {
6420                 cik_enable_gui_idle_interrupt(rdev, false);
6421
6422                 RREG32(CB_CGTT_SCLK_CTRL);
6423                 RREG32(CB_CGTT_SCLK_CTRL);
6424                 RREG32(CB_CGTT_SCLK_CTRL);
6425                 RREG32(CB_CGTT_SCLK_CTRL);
6426
6427                 data &= ~(CGCG_EN | CGLS_EN);
6428         }
6429
6430         if (orig != data)
6431                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6432
6433 }
6434
6435 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6436 {
6437         u32 data, orig, tmp = 0;
6438
6439         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6440                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6441                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6442                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6443                                 data |= CP_MEM_LS_EN;
6444                                 if (orig != data)
6445                                         WREG32(CP_MEM_SLP_CNTL, data);
6446                         }
6447                 }
6448
6449                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6450                 data |= 0x00000001;
6451                 data &= 0xfffffffd;
6452                 if (orig != data)
6453                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6454
6455                 tmp = cik_halt_rlc(rdev);
6456
6457                 mutex_lock(&rdev->grbm_idx_mutex);
6458                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6459                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6460                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6461                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6462                 WREG32(RLC_SERDES_WR_CTRL, data);
6463                 mutex_unlock(&rdev->grbm_idx_mutex);
6464
6465                 cik_update_rlc(rdev, tmp);
6466
6467                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6468                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6469                         data &= ~SM_MODE_MASK;
6470                         data |= SM_MODE(0x2);
6471                         data |= SM_MODE_ENABLE;
6472                         data &= ~CGTS_OVERRIDE;
6473                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6474                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6475                                 data &= ~CGTS_LS_OVERRIDE;
6476                         data &= ~ON_MONITOR_ADD_MASK;
6477                         data |= ON_MONITOR_ADD_EN;
6478                         data |= ON_MONITOR_ADD(0x96);
6479                         if (orig != data)
6480                                 WREG32(CGTS_SM_CTRL_REG, data);
6481                 }
6482         } else {
6483                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484                 data |= 0x00000003;
6485                 if (orig != data)
6486                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6487
6488                 data = RREG32(RLC_MEM_SLP_CNTL);
6489                 if (data & RLC_MEM_LS_EN) {
6490                         data &= ~RLC_MEM_LS_EN;
6491                         WREG32(RLC_MEM_SLP_CNTL, data);
6492                 }
6493
6494                 data = RREG32(CP_MEM_SLP_CNTL);
6495                 if (data & CP_MEM_LS_EN) {
6496                         data &= ~CP_MEM_LS_EN;
6497                         WREG32(CP_MEM_SLP_CNTL, data);
6498                 }
6499
6500                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6501                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6502                 if (orig != data)
6503                         WREG32(CGTS_SM_CTRL_REG, data);
6504
6505                 tmp = cik_halt_rlc(rdev);
6506
6507                 mutex_lock(&rdev->grbm_idx_mutex);
6508                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6509                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6510                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6511                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6512                 WREG32(RLC_SERDES_WR_CTRL, data);
6513                 mutex_unlock(&rdev->grbm_idx_mutex);
6514
6515                 cik_update_rlc(rdev, tmp);
6516         }
6517 }
6518
6519 static const u32 mc_cg_registers[] =
6520 {
6521         MC_HUB_MISC_HUB_CG,
6522         MC_HUB_MISC_SIP_CG,
6523         MC_HUB_MISC_VM_CG,
6524         MC_XPB_CLK_GAT,
6525         ATC_MISC_CG,
6526         MC_CITF_MISC_WR_CG,
6527         MC_CITF_MISC_RD_CG,
6528         MC_CITF_MISC_VM_CG,
6529         VM_L2_CG,
6530 };
6531
6532 static void cik_enable_mc_ls(struct radeon_device *rdev,
6533                              bool enable)
6534 {
6535         int i;
6536         u32 orig, data;
6537
6538         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6539                 orig = data = RREG32(mc_cg_registers[i]);
6540                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6541                         data |= MC_LS_ENABLE;
6542                 else
6543                         data &= ~MC_LS_ENABLE;
6544                 if (data != orig)
6545                         WREG32(mc_cg_registers[i], data);
6546         }
6547 }
6548
6549 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6550                                bool enable)
6551 {
6552         int i;
6553         u32 orig, data;
6554
6555         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6556                 orig = data = RREG32(mc_cg_registers[i]);
6557                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6558                         data |= MC_CG_ENABLE;
6559                 else
6560                         data &= ~MC_CG_ENABLE;
6561                 if (data != orig)
6562                         WREG32(mc_cg_registers[i], data);
6563         }
6564 }
6565
6566 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6567                                  bool enable)
6568 {
6569         u32 orig, data;
6570
6571         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6572                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6573                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6574         } else {
6575                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6576                 data |= 0xff000000;
6577                 if (data != orig)
6578                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6579
6580                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6581                 data |= 0xff000000;
6582                 if (data != orig)
6583                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6584         }
6585 }
6586
6587 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6588                                  bool enable)
6589 {
6590         u32 orig, data;
6591
6592         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6593                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6594                 data |= 0x100;
6595                 if (orig != data)
6596                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6597
6598                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6599                 data |= 0x100;
6600                 if (orig != data)
6601                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6602         } else {
6603                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6604                 data &= ~0x100;
6605                 if (orig != data)
6606                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6607
6608                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6609                 data &= ~0x100;
6610                 if (orig != data)
6611                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6612         }
6613 }
6614
6615 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6616                                 bool enable)
6617 {
6618         u32 orig, data;
6619
6620         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6621                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6622                 data = 0xfff;
6623                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6624
6625                 orig = data = RREG32(UVD_CGC_CTRL);
6626                 data |= DCM;
6627                 if (orig != data)
6628                         WREG32(UVD_CGC_CTRL, data);
6629         } else {
6630                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631                 data &= ~0xfff;
6632                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633
6634                 orig = data = RREG32(UVD_CGC_CTRL);
6635                 data &= ~DCM;
6636                 if (orig != data)
6637                         WREG32(UVD_CGC_CTRL, data);
6638         }
6639 }
6640
6641 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6642                                bool enable)
6643 {
6644         u32 orig, data;
6645
6646         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6647
6648         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6649                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6650                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6651         else
6652                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6653                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6654
6655         if (orig != data)
6656                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6657 }
6658
6659 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6660                                 bool enable)
6661 {
6662         u32 orig, data;
6663
6664         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6665
6666         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6667                 data &= ~CLOCK_GATING_DIS;
6668         else
6669                 data |= CLOCK_GATING_DIS;
6670
6671         if (orig != data)
6672                 WREG32(HDP_HOST_PATH_CNTL, data);
6673 }
6674
6675 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6676                               bool enable)
6677 {
6678         u32 orig, data;
6679
6680         orig = data = RREG32(HDP_MEM_POWER_LS);
6681
6682         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6683                 data |= HDP_LS_ENABLE;
6684         else
6685                 data &= ~HDP_LS_ENABLE;
6686
6687         if (orig != data)
6688                 WREG32(HDP_MEM_POWER_LS, data);
6689 }
6690
6691 void cik_update_cg(struct radeon_device *rdev,
6692                    u32 block, bool enable)
6693 {
6694
6695         if (block & RADEON_CG_BLOCK_GFX) {
6696                 cik_enable_gui_idle_interrupt(rdev, false);
6697                 /* order matters! */
6698                 if (enable) {
6699                         cik_enable_mgcg(rdev, true);
6700                         cik_enable_cgcg(rdev, true);
6701                 } else {
6702                         cik_enable_cgcg(rdev, false);
6703                         cik_enable_mgcg(rdev, false);
6704                 }
6705                 cik_enable_gui_idle_interrupt(rdev, true);
6706         }
6707
6708         if (block & RADEON_CG_BLOCK_MC) {
6709                 if (!(rdev->flags & RADEON_IS_IGP)) {
6710                         cik_enable_mc_mgcg(rdev, enable);
6711                         cik_enable_mc_ls(rdev, enable);
6712                 }
6713         }
6714
6715         if (block & RADEON_CG_BLOCK_SDMA) {
6716                 cik_enable_sdma_mgcg(rdev, enable);
6717                 cik_enable_sdma_mgls(rdev, enable);
6718         }
6719
6720         if (block & RADEON_CG_BLOCK_BIF) {
6721                 cik_enable_bif_mgls(rdev, enable);
6722         }
6723
6724         if (block & RADEON_CG_BLOCK_UVD) {
6725                 if (rdev->has_uvd)
6726                         cik_enable_uvd_mgcg(rdev, enable);
6727         }
6728
6729         if (block & RADEON_CG_BLOCK_HDP) {
6730                 cik_enable_hdp_mgcg(rdev, enable);
6731                 cik_enable_hdp_ls(rdev, enable);
6732         }
6733
6734         if (block & RADEON_CG_BLOCK_VCE) {
6735                 vce_v2_0_enable_mgcg(rdev, enable);
6736         }
6737 }
6738
6739 static void cik_init_cg(struct radeon_device *rdev)
6740 {
6741
6742         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6743
6744         if (rdev->has_uvd)
6745                 si_init_uvd_internal_cg(rdev);
6746
6747         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6748                              RADEON_CG_BLOCK_SDMA |
6749                              RADEON_CG_BLOCK_BIF |
6750                              RADEON_CG_BLOCK_UVD |
6751                              RADEON_CG_BLOCK_HDP), true);
6752 }
6753
6754 static void cik_fini_cg(struct radeon_device *rdev)
6755 {
6756         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757                              RADEON_CG_BLOCK_SDMA |
6758                              RADEON_CG_BLOCK_BIF |
6759                              RADEON_CG_BLOCK_UVD |
6760                              RADEON_CG_BLOCK_HDP), false);
6761
6762         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6763 }
6764
6765 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6766                                           bool enable)
6767 {
6768         u32 data, orig;
6769
6770         orig = data = RREG32(RLC_PG_CNTL);
6771         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6772                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6773         else
6774                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6775         if (orig != data)
6776                 WREG32(RLC_PG_CNTL, data);
6777 }
6778
6779 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6780                                           bool enable)
6781 {
6782         u32 data, orig;
6783
6784         orig = data = RREG32(RLC_PG_CNTL);
6785         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6786                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6787         else
6788                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6789         if (orig != data)
6790                 WREG32(RLC_PG_CNTL, data);
6791 }
6792
6793 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6794 {
6795         u32 data, orig;
6796
6797         orig = data = RREG32(RLC_PG_CNTL);
6798         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6799                 data &= ~DISABLE_CP_PG;
6800         else
6801                 data |= DISABLE_CP_PG;
6802         if (orig != data)
6803                 WREG32(RLC_PG_CNTL, data);
6804 }
6805
6806 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6807 {
6808         u32 data, orig;
6809
6810         orig = data = RREG32(RLC_PG_CNTL);
6811         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6812                 data &= ~DISABLE_GDS_PG;
6813         else
6814                 data |= DISABLE_GDS_PG;
6815         if (orig != data)
6816                 WREG32(RLC_PG_CNTL, data);
6817 }
6818
6819 #define CP_ME_TABLE_SIZE    96
6820 #define CP_ME_TABLE_OFFSET  2048
6821 #define CP_MEC_TABLE_OFFSET 4096
6822
6823 void cik_init_cp_pg_table(struct radeon_device *rdev)
6824 {
6825         volatile u32 *dst_ptr;
6826         int me, i, max_me = 4;
6827         u32 bo_offset = 0;
6828         u32 table_offset, table_size;
6829
6830         if (rdev->family == CHIP_KAVERI)
6831                 max_me = 5;
6832
6833         if (rdev->rlc.cp_table_ptr == NULL)
6834                 return;
6835
6836         /* write the cp table buffer */
6837         dst_ptr = rdev->rlc.cp_table_ptr;
6838         for (me = 0; me < max_me; me++) {
6839                 if (rdev->new_fw) {
6840                         const __le32 *fw_data;
6841                         const struct gfx_firmware_header_v1_0 *hdr;
6842
6843                         if (me == 0) {
6844                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6845                                 fw_data = (const __le32 *)
6846                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6847                                 table_offset = le32_to_cpu(hdr->jt_offset);
6848                                 table_size = le32_to_cpu(hdr->jt_size);
6849                         } else if (me == 1) {
6850                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6851                                 fw_data = (const __le32 *)
6852                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6853                                 table_offset = le32_to_cpu(hdr->jt_offset);
6854                                 table_size = le32_to_cpu(hdr->jt_size);
6855                         } else if (me == 2) {
6856                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6857                                 fw_data = (const __le32 *)
6858                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6859                                 table_offset = le32_to_cpu(hdr->jt_offset);
6860                                 table_size = le32_to_cpu(hdr->jt_size);
6861                         } else if (me == 3) {
6862                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6863                                 fw_data = (const __le32 *)
6864                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6865                                 table_offset = le32_to_cpu(hdr->jt_offset);
6866                                 table_size = le32_to_cpu(hdr->jt_size);
6867                         } else {
6868                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6869                                 fw_data = (const __le32 *)
6870                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6871                                 table_offset = le32_to_cpu(hdr->jt_offset);
6872                                 table_size = le32_to_cpu(hdr->jt_size);
6873                         }
6874
6875                         for (i = 0; i < table_size; i ++) {
6876                                 dst_ptr[bo_offset + i] =
6877                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6878                         }
6879                         bo_offset += table_size;
6880                 } else {
6881                         const __be32 *fw_data;
6882                         table_size = CP_ME_TABLE_SIZE;
6883
6884                         if (me == 0) {
6885                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6886                                 table_offset = CP_ME_TABLE_OFFSET;
6887                         } else if (me == 1) {
6888                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6889                                 table_offset = CP_ME_TABLE_OFFSET;
6890                         } else if (me == 2) {
6891                                 fw_data = (const __be32 *)rdev->me_fw->data;
6892                                 table_offset = CP_ME_TABLE_OFFSET;
6893                         } else {
6894                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6895                                 table_offset = CP_MEC_TABLE_OFFSET;
6896                         }
6897
6898                         for (i = 0; i < table_size; i ++) {
6899                                 dst_ptr[bo_offset + i] =
6900                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6901                         }
6902                         bo_offset += table_size;
6903                 }
6904         }
6905 }
6906
6907 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6908                                 bool enable)
6909 {
6910         u32 data, orig;
6911
6912         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6913                 orig = data = RREG32(RLC_PG_CNTL);
6914                 data |= GFX_PG_ENABLE;
6915                 if (orig != data)
6916                         WREG32(RLC_PG_CNTL, data);
6917
6918                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6919                 data |= AUTO_PG_EN;
6920                 if (orig != data)
6921                         WREG32(RLC_AUTO_PG_CTRL, data);
6922         } else {
6923                 orig = data = RREG32(RLC_PG_CNTL);
6924                 data &= ~GFX_PG_ENABLE;
6925                 if (orig != data)
6926                         WREG32(RLC_PG_CNTL, data);
6927
6928                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6929                 data &= ~AUTO_PG_EN;
6930                 if (orig != data)
6931                         WREG32(RLC_AUTO_PG_CTRL, data);
6932
6933                 data = RREG32(DB_RENDER_CONTROL);
6934         }
6935 }
6936
6937 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6938 {
6939         u32 mask = 0, tmp, tmp1;
6940         int i;
6941
6942         mutex_lock(&rdev->grbm_idx_mutex);
6943         cik_select_se_sh(rdev, se, sh);
6944         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6945         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6946         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6947         mutex_unlock(&rdev->grbm_idx_mutex);
6948
6949         tmp &= 0xffff0000;
6950
6951         tmp |= tmp1;
6952         tmp >>= 16;
6953
6954         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6955                 mask <<= 1;
6956                 mask |= 1;
6957         }
6958
6959         return (~tmp) & mask;
6960 }
6961
6962 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6963 {
6964         u32 i, j, k, active_cu_number = 0;
6965         u32 mask, counter, cu_bitmap;
6966         u32 tmp = 0;
6967
6968         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6969                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6970                         mask = 1;
6971                         cu_bitmap = 0;
6972                         counter = 0;
6973                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6974                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6975                                         if (counter < 2)
6976                                                 cu_bitmap |= mask;
6977                                         counter ++;
6978                                 }
6979                                 mask <<= 1;
6980                         }
6981
6982                         active_cu_number += counter;
6983                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6984                 }
6985         }
6986
6987         WREG32(RLC_PG_AO_CU_MASK, tmp);
6988
6989         tmp = RREG32(RLC_MAX_PG_CU);
6990         tmp &= ~MAX_PU_CU_MASK;
6991         tmp |= MAX_PU_CU(active_cu_number);
6992         WREG32(RLC_MAX_PG_CU, tmp);
6993 }
6994
6995 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6996                                        bool enable)
6997 {
6998         u32 data, orig;
6999
7000         orig = data = RREG32(RLC_PG_CNTL);
7001         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7002                 data |= STATIC_PER_CU_PG_ENABLE;
7003         else
7004                 data &= ~STATIC_PER_CU_PG_ENABLE;
7005         if (orig != data)
7006                 WREG32(RLC_PG_CNTL, data);
7007 }
7008
7009 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7010                                         bool enable)
7011 {
7012         u32 data, orig;
7013
7014         orig = data = RREG32(RLC_PG_CNTL);
7015         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7016                 data |= DYN_PER_CU_PG_ENABLE;
7017         else
7018                 data &= ~DYN_PER_CU_PG_ENABLE;
7019         if (orig != data)
7020                 WREG32(RLC_PG_CNTL, data);
7021 }
7022
7023 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7024 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7025
7026 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7027 {
7028         u32 data, orig;
7029         u32 i;
7030
7031         if (rdev->rlc.cs_data) {
7032                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7033                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7034                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7035                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7036         } else {
7037                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7038                 for (i = 0; i < 3; i++)
7039                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7040         }
7041         if (rdev->rlc.reg_list) {
7042                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7043                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7044                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7045         }
7046
7047         orig = data = RREG32(RLC_PG_CNTL);
7048         data |= GFX_PG_SRC;
7049         if (orig != data)
7050                 WREG32(RLC_PG_CNTL, data);
7051
7052         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7053         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7054
7055         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7056         data &= ~IDLE_POLL_COUNT_MASK;
7057         data |= IDLE_POLL_COUNT(0x60);
7058         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7059
7060         data = 0x10101010;
7061         WREG32(RLC_PG_DELAY, data);
7062
7063         data = RREG32(RLC_PG_DELAY_2);
7064         data &= ~0xff;
7065         data |= 0x3;
7066         WREG32(RLC_PG_DELAY_2, data);
7067
7068         data = RREG32(RLC_AUTO_PG_CTRL);
7069         data &= ~GRBM_REG_SGIT_MASK;
7070         data |= GRBM_REG_SGIT(0x700);
7071         WREG32(RLC_AUTO_PG_CTRL, data);
7072
7073 }
7074
7075 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7076 {
7077         cik_enable_gfx_cgpg(rdev, enable);
7078         cik_enable_gfx_static_mgpg(rdev, enable);
7079         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7080 }
7081
7082 u32 cik_get_csb_size(struct radeon_device *rdev)
7083 {
7084         u32 count = 0;
7085         const struct cs_section_def *sect = NULL;
7086         const struct cs_extent_def *ext = NULL;
7087
7088         if (rdev->rlc.cs_data == NULL)
7089                 return 0;
7090
7091         /* begin clear state */
7092         count += 2;
7093         /* context control state */
7094         count += 3;
7095
7096         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7097                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7098                         if (sect->id == SECT_CONTEXT)
7099                                 count += 2 + ext->reg_count;
7100                         else
7101                                 return 0;
7102                 }
7103         }
7104         /* pa_sc_raster_config/pa_sc_raster_config1 */
7105         count += 4;
7106         /* end clear state */
7107         count += 2;
7108         /* clear state */
7109         count += 2;
7110
7111         return count;
7112 }
7113
7114 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7115 {
7116         u32 count = 0, i;
7117         const struct cs_section_def *sect = NULL;
7118         const struct cs_extent_def *ext = NULL;
7119
7120         if (rdev->rlc.cs_data == NULL)
7121                 return;
7122         if (buffer == NULL)
7123                 return;
7124
7125         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7126         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7127
7128         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7129         buffer[count++] = cpu_to_le32(0x80000000);
7130         buffer[count++] = cpu_to_le32(0x80000000);
7131
7132         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7133                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7134                         if (sect->id == SECT_CONTEXT) {
7135                                 buffer[count++] =
7136                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7137                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7138                                 for (i = 0; i < ext->reg_count; i++)
7139                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7140                         } else {
7141                                 return;
7142                         }
7143                 }
7144         }
7145
7146         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7147         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7148         switch (rdev->family) {
7149         case CHIP_BONAIRE:
7150                 buffer[count++] = cpu_to_le32(0x16000012);
7151                 buffer[count++] = cpu_to_le32(0x00000000);
7152                 break;
7153         case CHIP_KAVERI:
7154                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7155                 buffer[count++] = cpu_to_le32(0x00000000);
7156                 break;
7157         case CHIP_KABINI:
7158         case CHIP_MULLINS:
7159                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7160                 buffer[count++] = cpu_to_le32(0x00000000);
7161                 break;
7162         case CHIP_HAWAII:
7163                 buffer[count++] = cpu_to_le32(0x3a00161a);
7164                 buffer[count++] = cpu_to_le32(0x0000002e);
7165                 break;
7166         default:
7167                 buffer[count++] = cpu_to_le32(0x00000000);
7168                 buffer[count++] = cpu_to_le32(0x00000000);
7169                 break;
7170         }
7171
7172         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7173         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7174
7175         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7176         buffer[count++] = cpu_to_le32(0);
7177 }
7178
7179 static void cik_init_pg(struct radeon_device *rdev)
7180 {
7181         if (rdev->pg_flags) {
7182                 cik_enable_sck_slowdown_on_pu(rdev, true);
7183                 cik_enable_sck_slowdown_on_pd(rdev, true);
7184                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7185                         cik_init_gfx_cgpg(rdev);
7186                         cik_enable_cp_pg(rdev, true);
7187                         cik_enable_gds_pg(rdev, true);
7188                 }
7189                 cik_init_ao_cu_mask(rdev);
7190                 cik_update_gfx_pg(rdev, true);
7191         }
7192 }
7193
7194 static void cik_fini_pg(struct radeon_device *rdev)
7195 {
7196         if (rdev->pg_flags) {
7197                 cik_update_gfx_pg(rdev, false);
7198                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7199                         cik_enable_cp_pg(rdev, false);
7200                         cik_enable_gds_pg(rdev, false);
7201                 }
7202         }
7203 }
7204
7205 /*
7206  * Interrupts
7207  * Starting with r6xx, interrupts are handled via a ring buffer.
7208  * Ring buffers are areas of GPU accessible memory that the GPU
7209  * writes interrupt vectors into and the host reads vectors out of.
7210  * There is a rptr (read pointer) that determines where the
7211  * host is currently reading, and a wptr (write pointer)
7212  * which determines where the GPU has written.  When the
7213  * pointers are equal, the ring is idle.  When the GPU
7214  * writes vectors to the ring buffer, it increments the
7215  * wptr.  When there is an interrupt, the host then starts
7216  * fetching commands and processing them until the pointers are
7217  * equal again at which point it updates the rptr.
7218  */
7219
7220 /**
7221  * cik_enable_interrupts - Enable the interrupt ring buffer
7222  *
7223  * @rdev: radeon_device pointer
7224  *
7225  * Enable the interrupt ring buffer (CIK).
7226  */
7227 static void cik_enable_interrupts(struct radeon_device *rdev)
7228 {
7229         u32 ih_cntl = RREG32(IH_CNTL);
7230         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7231
7232         ih_cntl |= ENABLE_INTR;
7233         ih_rb_cntl |= IH_RB_ENABLE;
7234         WREG32(IH_CNTL, ih_cntl);
7235         WREG32(IH_RB_CNTL, ih_rb_cntl);
7236         rdev->ih.enabled = true;
7237 }
7238
7239 /**
7240  * cik_disable_interrupts - Disable the interrupt ring buffer
7241  *
7242  * @rdev: radeon_device pointer
7243  *
7244  * Disable the interrupt ring buffer (CIK).
7245  */
7246 static void cik_disable_interrupts(struct radeon_device *rdev)
7247 {
7248         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7249         u32 ih_cntl = RREG32(IH_CNTL);
7250
7251         ih_rb_cntl &= ~IH_RB_ENABLE;
7252         ih_cntl &= ~ENABLE_INTR;
7253         WREG32(IH_RB_CNTL, ih_rb_cntl);
7254         WREG32(IH_CNTL, ih_cntl);
7255         /* set rptr, wptr to 0 */
7256         WREG32(IH_RB_RPTR, 0);
7257         WREG32(IH_RB_WPTR, 0);
7258         rdev->ih.enabled = false;
7259         rdev->ih.rptr = 0;
7260 }
7261
7262 /**
7263  * cik_disable_interrupt_state - Disable all interrupt sources
7264  *
7265  * @rdev: radeon_device pointer
7266  *
7267  * Clear all interrupt enable bits used by the driver (CIK).
7268  */
7269 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7270 {
7271         u32 tmp;
7272
7273         /* gfx ring */
7274         tmp = RREG32(CP_INT_CNTL_RING0) &
7275                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7276         WREG32(CP_INT_CNTL_RING0, tmp);
7277         /* sdma */
7278         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7279         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7280         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7281         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7282         /* compute queues */
7283         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7284         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7285         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7286         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7287         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7288         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7289         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7290         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7291         /* grbm */
7292         WREG32(GRBM_INT_CNTL, 0);
7293         /* SRBM */
7294         WREG32(SRBM_INT_CNTL, 0);
7295         /* vline/vblank, etc. */
7296         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7297         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7298         if (rdev->num_crtc >= 4) {
7299                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7300                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7301         }
7302         if (rdev->num_crtc >= 6) {
7303                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7304                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7305         }
7306         /* pflip */
7307         if (rdev->num_crtc >= 2) {
7308                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7309                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7310         }
7311         if (rdev->num_crtc >= 4) {
7312                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7313                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7314         }
7315         if (rdev->num_crtc >= 6) {
7316                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7317                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7318         }
7319
7320         /* dac hotplug */
7321         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7322
7323         /* digital hotplug */
7324         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7325         WREG32(DC_HPD1_INT_CONTROL, tmp);
7326         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7327         WREG32(DC_HPD2_INT_CONTROL, tmp);
7328         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7329         WREG32(DC_HPD3_INT_CONTROL, tmp);
7330         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7331         WREG32(DC_HPD4_INT_CONTROL, tmp);
7332         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7333         WREG32(DC_HPD5_INT_CONTROL, tmp);
7334         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7335         WREG32(DC_HPD6_INT_CONTROL, tmp);
7336
7337 }
7338
7339 /**
7340  * cik_irq_init - init and enable the interrupt ring
7341  *
7342  * @rdev: radeon_device pointer
7343  *
7344  * Allocate a ring buffer for the interrupt controller,
7345  * enable the RLC, disable interrupts, enable the IH
7346  * ring buffer and enable it (CIK).
7347  * Called at device load and reume.
7348  * Returns 0 for success, errors for failure.
7349  */
7350 static int cik_irq_init(struct radeon_device *rdev)
7351 {
7352         int ret = 0;
7353         int rb_bufsz;
7354         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7355
7356         /* allocate ring */
7357         ret = r600_ih_ring_alloc(rdev);
7358         if (ret)
7359                 return ret;
7360
7361         /* disable irqs */
7362         cik_disable_interrupts(rdev);
7363
7364         /* init rlc */
7365         ret = cik_rlc_resume(rdev);
7366         if (ret) {
7367                 r600_ih_ring_fini(rdev);
7368                 return ret;
7369         }
7370
7371         /* setup interrupt control */
7372         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7373         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7374         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7375         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7376          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7377          */
7378         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7379         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7380         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7381         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7382
7383         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7384         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7385
7386         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7387                       IH_WPTR_OVERFLOW_CLEAR |
7388                       (rb_bufsz << 1));
7389
7390         if (rdev->wb.enabled)
7391                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7392
7393         /* set the writeback address whether it's enabled or not */
7394         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7395         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7396
7397         WREG32(IH_RB_CNTL, ih_rb_cntl);
7398
7399         /* set rptr, wptr to 0 */
7400         WREG32(IH_RB_RPTR, 0);
7401         WREG32(IH_RB_WPTR, 0);
7402
7403         /* Default settings for IH_CNTL (disabled at first) */
7404         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7405         /* RPTR_REARM only works if msi's are enabled */
7406         if (rdev->msi_enabled)
7407                 ih_cntl |= RPTR_REARM;
7408         WREG32(IH_CNTL, ih_cntl);
7409
7410         /* force the active interrupt state to all disabled */
7411         cik_disable_interrupt_state(rdev);
7412
7413         pci_set_master(rdev->pdev);
7414
7415         /* enable irqs */
7416         cik_enable_interrupts(rdev);
7417
7418         return ret;
7419 }
7420
7421 /**
7422  * cik_irq_set - enable/disable interrupt sources
7423  *
7424  * @rdev: radeon_device pointer
7425  *
7426  * Enable interrupt sources on the GPU (vblanks, hpd,
7427  * etc.) (CIK).
7428  * Returns 0 for success, errors for failure.
7429  */
7430 int cik_irq_set(struct radeon_device *rdev)
7431 {
7432         u32 cp_int_cntl;
7433         u32 cp_m1p0;
7434         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7435         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7436         u32 grbm_int_cntl = 0;
7437         u32 dma_cntl, dma_cntl1;
7438
7439         if (!rdev->irq.installed) {
7440                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7441                 return -EINVAL;
7442         }
7443         /* don't enable anything if the ih is disabled */
7444         if (!rdev->ih.enabled) {
7445                 cik_disable_interrupts(rdev);
7446                 /* force the active interrupt state to all disabled */
7447                 cik_disable_interrupt_state(rdev);
7448                 return 0;
7449         }
7450
7451         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7452                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7453         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7454
7455         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7456         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7457         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7458         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7459         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7460         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7461
7462         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7463         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7464
7465         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7466
7467         /* enable CP interrupts on all rings */
7468         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7469                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7470                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7471         }
7472         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7473                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7474                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7475                 if (ring->me == 1) {
7476                         switch (ring->pipe) {
7477                         case 0:
7478                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7479                                 break;
7480                         default:
7481                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7482                                 break;
7483                         }
7484                 } else {
7485                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7486                 }
7487         }
7488         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7489                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7490                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7491                 if (ring->me == 1) {
7492                         switch (ring->pipe) {
7493                         case 0:
7494                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7495                                 break;
7496                         default:
7497                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7498                                 break;
7499                         }
7500                 } else {
7501                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7502                 }
7503         }
7504
7505         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7506                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7507                 dma_cntl |= TRAP_ENABLE;
7508         }
7509
7510         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7511                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7512                 dma_cntl1 |= TRAP_ENABLE;
7513         }
7514
7515         if (rdev->irq.crtc_vblank_int[0] ||
7516             atomic_read(&rdev->irq.pflip[0])) {
7517                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7518                 crtc1 |= VBLANK_INTERRUPT_MASK;
7519         }
7520         if (rdev->irq.crtc_vblank_int[1] ||
7521             atomic_read(&rdev->irq.pflip[1])) {
7522                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7523                 crtc2 |= VBLANK_INTERRUPT_MASK;
7524         }
7525         if (rdev->irq.crtc_vblank_int[2] ||
7526             atomic_read(&rdev->irq.pflip[2])) {
7527                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7528                 crtc3 |= VBLANK_INTERRUPT_MASK;
7529         }
7530         if (rdev->irq.crtc_vblank_int[3] ||
7531             atomic_read(&rdev->irq.pflip[3])) {
7532                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7533                 crtc4 |= VBLANK_INTERRUPT_MASK;
7534         }
7535         if (rdev->irq.crtc_vblank_int[4] ||
7536             atomic_read(&rdev->irq.pflip[4])) {
7537                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7538                 crtc5 |= VBLANK_INTERRUPT_MASK;
7539         }
7540         if (rdev->irq.crtc_vblank_int[5] ||
7541             atomic_read(&rdev->irq.pflip[5])) {
7542                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7543                 crtc6 |= VBLANK_INTERRUPT_MASK;
7544         }
7545         if (rdev->irq.hpd[0]) {
7546                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7547                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7548         }
7549         if (rdev->irq.hpd[1]) {
7550                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7551                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7552         }
7553         if (rdev->irq.hpd[2]) {
7554                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7555                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7556         }
7557         if (rdev->irq.hpd[3]) {
7558                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7559                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7560         }
7561         if (rdev->irq.hpd[4]) {
7562                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7563                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7564         }
7565         if (rdev->irq.hpd[5]) {
7566                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7567                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7568         }
7569
7570         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7571
7572         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7573         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7574
7575         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7576
7577         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7578
7579         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7580         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7581         if (rdev->num_crtc >= 4) {
7582                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7583                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7584         }
7585         if (rdev->num_crtc >= 6) {
7586                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7587                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7588         }
7589
7590         if (rdev->num_crtc >= 2) {
7591                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7592                        GRPH_PFLIP_INT_MASK);
7593                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7594                        GRPH_PFLIP_INT_MASK);
7595         }
7596         if (rdev->num_crtc >= 4) {
7597                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7598                        GRPH_PFLIP_INT_MASK);
7599                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7600                        GRPH_PFLIP_INT_MASK);
7601         }
7602         if (rdev->num_crtc >= 6) {
7603                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7604                        GRPH_PFLIP_INT_MASK);
7605                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7606                        GRPH_PFLIP_INT_MASK);
7607         }
7608
7609         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7610         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7611         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7612         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7613         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7614         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7615
7616         /* posting read */
7617         RREG32(SRBM_STATUS);
7618
7619         return 0;
7620 }
7621
7622 /**
7623  * cik_irq_ack - ack interrupt sources
7624  *
7625  * @rdev: radeon_device pointer
7626  *
7627  * Ack interrupt sources on the GPU (vblanks, hpd,
7628  * etc.) (CIK).  Certain interrupts sources are sw
7629  * generated and do not require an explicit ack.
7630  */
7631 static inline void cik_irq_ack(struct radeon_device *rdev)
7632 {
7633         u32 tmp;
7634
7635         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7636         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7637         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7638         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7639         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7640         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7641         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7642
7643         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7644                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7645         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7646                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7647         if (rdev->num_crtc >= 4) {
7648                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7649                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7650                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7651                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7652         }
7653         if (rdev->num_crtc >= 6) {
7654                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7655                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7656                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7657                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7658         }
7659
7660         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7661                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7662                        GRPH_PFLIP_INT_CLEAR);
7663         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7664                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7665                        GRPH_PFLIP_INT_CLEAR);
7666         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7667                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7668         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7669                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7670         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7671                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7672         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7673                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7674
7675         if (rdev->num_crtc >= 4) {
7676                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7677                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7678                                GRPH_PFLIP_INT_CLEAR);
7679                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7680                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7681                                GRPH_PFLIP_INT_CLEAR);
7682                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7683                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7684                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7685                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7686                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7687                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7688                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7689                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7690         }
7691
7692         if (rdev->num_crtc >= 6) {
7693                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7694                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7695                                GRPH_PFLIP_INT_CLEAR);
7696                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7697                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7698                                GRPH_PFLIP_INT_CLEAR);
7699                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7700                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7701                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7702                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7703                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7704                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7705                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7706                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7707         }
7708
7709         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7710                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7711                 tmp |= DC_HPDx_INT_ACK;
7712                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7713         }
7714         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7715                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7716                 tmp |= DC_HPDx_INT_ACK;
7717                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7718         }
7719         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7720                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7721                 tmp |= DC_HPDx_INT_ACK;
7722                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7723         }
7724         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7725                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7726                 tmp |= DC_HPDx_INT_ACK;
7727                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7728         }
7729         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7730                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7731                 tmp |= DC_HPDx_INT_ACK;
7732                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7733         }
7734         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7735                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7736                 tmp |= DC_HPDx_INT_ACK;
7737                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7738         }
7739         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7740                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7741                 tmp |= DC_HPDx_RX_INT_ACK;
7742                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7743         }
7744         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7745                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7746                 tmp |= DC_HPDx_RX_INT_ACK;
7747                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7748         }
7749         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7750                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7751                 tmp |= DC_HPDx_RX_INT_ACK;
7752                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7753         }
7754         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7755                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7756                 tmp |= DC_HPDx_RX_INT_ACK;
7757                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7758         }
7759         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7760                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7761                 tmp |= DC_HPDx_RX_INT_ACK;
7762                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7763         }
7764         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7765                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7766                 tmp |= DC_HPDx_RX_INT_ACK;
7767                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7768         }
7769 }
7770
7771 /**
7772  * cik_irq_disable - disable interrupts
7773  *
7774  * @rdev: radeon_device pointer
7775  *
7776  * Disable interrupts on the hw (CIK).
7777  */
7778 static void cik_irq_disable(struct radeon_device *rdev)
7779 {
7780         cik_disable_interrupts(rdev);
7781         /* Wait and acknowledge irq */
7782         mdelay(1);
7783         cik_irq_ack(rdev);
7784         cik_disable_interrupt_state(rdev);
7785 }
7786
7787 /**
7788  * cik_irq_disable - disable interrupts for suspend
7789  *
7790  * @rdev: radeon_device pointer
7791  *
7792  * Disable interrupts and stop the RLC (CIK).
7793  * Used for suspend.
7794  */
7795 static void cik_irq_suspend(struct radeon_device *rdev)
7796 {
7797         cik_irq_disable(rdev);
7798         cik_rlc_stop(rdev);
7799 }
7800
7801 /**
7802  * cik_irq_fini - tear down interrupt support
7803  *
7804  * @rdev: radeon_device pointer
7805  *
7806  * Disable interrupts on the hw and free the IH ring
7807  * buffer (CIK).
7808  * Used for driver unload.
7809  */
7810 static void cik_irq_fini(struct radeon_device *rdev)
7811 {
7812         cik_irq_suspend(rdev);
7813         r600_ih_ring_fini(rdev);
7814 }
7815
7816 /**
7817  * cik_get_ih_wptr - get the IH ring buffer wptr
7818  *
7819  * @rdev: radeon_device pointer
7820  *
7821  * Get the IH ring buffer wptr from either the register
7822  * or the writeback memory buffer (CIK).  Also check for
7823  * ring buffer overflow and deal with it.
7824  * Used by cik_irq_process().
7825  * Returns the value of the wptr.
7826  */
7827 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7828 {
7829         u32 wptr, tmp;
7830
7831         if (rdev->wb.enabled)
7832                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7833         else
7834                 wptr = RREG32(IH_RB_WPTR);
7835
7836         if (wptr & RB_OVERFLOW) {
7837                 wptr &= ~RB_OVERFLOW;
7838                 /* When a ring buffer overflow happen start parsing interrupt
7839                  * from the last not overwritten vector (wptr + 16). Hopefully
7840                  * this should allow us to catchup.
7841                  */
7842                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7843                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7844                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7845                 tmp = RREG32(IH_RB_CNTL);
7846                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7847                 WREG32(IH_RB_CNTL, tmp);
7848         }
7849         return (wptr & rdev->ih.ptr_mask);
7850 }
7851
7852 /*        CIK IV Ring
7853  * Each IV ring entry is 128 bits:
7854  * [7:0]    - interrupt source id
7855  * [31:8]   - reserved
7856  * [59:32]  - interrupt source data
7857  * [63:60]  - reserved
7858  * [71:64]  - RINGID
7859  *            CP:
7860  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7861  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7862  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7863  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7864  *            PIPE_ID - ME0 0=3D
7865  *                    - ME1&2 compute dispatcher (4 pipes each)
7866  *            SDMA:
7867  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7868  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7869  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7870  * [79:72]  - VMID
7871  * [95:80]  - PASID
7872  * [127:96] - reserved
7873  */
7874 /**
7875  * cik_irq_process - interrupt handler
7876  *
7877  * @rdev: radeon_device pointer
7878  *
7879  * Interrupt hander (CIK).  Walk the IH ring,
7880  * ack interrupts and schedule work to handle
7881  * interrupt events.
7882  * Returns irq process return code.
7883  */
7884 int cik_irq_process(struct radeon_device *rdev)
7885 {
7886         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7887         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7888         u32 wptr;
7889         u32 rptr;
7890         u32 src_id, src_data, ring_id;
7891         u8 me_id, pipe_id, queue_id;
7892         u32 ring_index;
7893         bool queue_hotplug = false;
7894         bool queue_dp = false;
7895         bool queue_reset = false;
7896         u32 addr, status, mc_client;
7897         bool queue_thermal = false;
7898
7899         if (!rdev->ih.enabled || rdev->shutdown)
7900                 return IRQ_NONE;
7901
7902         wptr = cik_get_ih_wptr(rdev);
7903
7904 restart_ih:
7905         /* is somebody else already processing irqs? */
7906         if (atomic_xchg(&rdev->ih.lock, 1))
7907                 return IRQ_NONE;
7908
7909         rptr = rdev->ih.rptr;
7910         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7911
7912         /* Order reading of wptr vs. reading of IH ring data */
7913         rmb();
7914
7915         /* display interrupts */
7916         cik_irq_ack(rdev);
7917
7918         while (rptr != wptr) {
7919                 /* wptr/rptr are in bytes! */
7920                 ring_index = rptr / 4;
7921
7922                 radeon_kfd_interrupt(rdev,
7923                                 (const void *) &rdev->ih.ring[ring_index]);
7924
7925                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7926                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7927                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7928
7929                 switch (src_id) {
7930                 case 1: /* D1 vblank/vline */
7931                         switch (src_data) {
7932                         case 0: /* D1 vblank */
7933                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7934                                         if (rdev->irq.crtc_vblank_int[0]) {
7935                                                 drm_handle_vblank(rdev->ddev, 0);
7936                                                 rdev->pm.vblank_sync = true;
7937                                                 wake_up(&rdev->irq.vblank_queue);
7938                                         }
7939                                         if (atomic_read(&rdev->irq.pflip[0]))
7940                                                 radeon_crtc_handle_vblank(rdev, 0);
7941                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7942                                         DRM_DEBUG("IH: D1 vblank\n");
7943                                 }
7944                                 break;
7945                         case 1: /* D1 vline */
7946                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7947                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7948                                         DRM_DEBUG("IH: D1 vline\n");
7949                                 }
7950                                 break;
7951                         default:
7952                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7953                                 break;
7954                         }
7955                         break;
7956                 case 2: /* D2 vblank/vline */
7957                         switch (src_data) {
7958                         case 0: /* D2 vblank */
7959                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7960                                         if (rdev->irq.crtc_vblank_int[1]) {
7961                                                 drm_handle_vblank(rdev->ddev, 1);
7962                                                 rdev->pm.vblank_sync = true;
7963                                                 wake_up(&rdev->irq.vblank_queue);
7964                                         }
7965                                         if (atomic_read(&rdev->irq.pflip[1]))
7966                                                 radeon_crtc_handle_vblank(rdev, 1);
7967                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7968                                         DRM_DEBUG("IH: D2 vblank\n");
7969                                 }
7970                                 break;
7971                         case 1: /* D2 vline */
7972                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7973                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7974                                         DRM_DEBUG("IH: D2 vline\n");
7975                                 }
7976                                 break;
7977                         default:
7978                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7979                                 break;
7980                         }
7981                         break;
7982                 case 3: /* D3 vblank/vline */
7983                         switch (src_data) {
7984                         case 0: /* D3 vblank */
7985                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7986                                         if (rdev->irq.crtc_vblank_int[2]) {
7987                                                 drm_handle_vblank(rdev->ddev, 2);
7988                                                 rdev->pm.vblank_sync = true;
7989                                                 wake_up(&rdev->irq.vblank_queue);
7990                                         }
7991                                         if (atomic_read(&rdev->irq.pflip[2]))
7992                                                 radeon_crtc_handle_vblank(rdev, 2);
7993                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7994                                         DRM_DEBUG("IH: D3 vblank\n");
7995                                 }
7996                                 break;
7997                         case 1: /* D3 vline */
7998                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7999                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8000                                         DRM_DEBUG("IH: D3 vline\n");
8001                                 }
8002                                 break;
8003                         default:
8004                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8005                                 break;
8006                         }
8007                         break;
8008                 case 4: /* D4 vblank/vline */
8009                         switch (src_data) {
8010                         case 0: /* D4 vblank */
8011                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
8012                                         if (rdev->irq.crtc_vblank_int[3]) {
8013                                                 drm_handle_vblank(rdev->ddev, 3);
8014                                                 rdev->pm.vblank_sync = true;
8015                                                 wake_up(&rdev->irq.vblank_queue);
8016                                         }
8017                                         if (atomic_read(&rdev->irq.pflip[3]))
8018                                                 radeon_crtc_handle_vblank(rdev, 3);
8019                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8020                                         DRM_DEBUG("IH: D4 vblank\n");
8021                                 }
8022                                 break;
8023                         case 1: /* D4 vline */
8024                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8025                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8026                                         DRM_DEBUG("IH: D4 vline\n");
8027                                 }
8028                                 break;
8029                         default:
8030                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8031                                 break;
8032                         }
8033                         break;
8034                 case 5: /* D5 vblank/vline */
8035                         switch (src_data) {
8036                         case 0: /* D5 vblank */
8037                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8038                                         if (rdev->irq.crtc_vblank_int[4]) {
8039                                                 drm_handle_vblank(rdev->ddev, 4);
8040                                                 rdev->pm.vblank_sync = true;
8041                                                 wake_up(&rdev->irq.vblank_queue);
8042                                         }
8043                                         if (atomic_read(&rdev->irq.pflip[4]))
8044                                                 radeon_crtc_handle_vblank(rdev, 4);
8045                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8046                                         DRM_DEBUG("IH: D5 vblank\n");
8047                                 }
8048                                 break;
8049                         case 1: /* D5 vline */
8050                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8051                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8052                                         DRM_DEBUG("IH: D5 vline\n");
8053                                 }
8054                                 break;
8055                         default:
8056                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8057                                 break;
8058                         }
8059                         break;
8060                 case 6: /* D6 vblank/vline */
8061                         switch (src_data) {
8062                         case 0: /* D6 vblank */
8063                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8064                                         if (rdev->irq.crtc_vblank_int[5]) {
8065                                                 drm_handle_vblank(rdev->ddev, 5);
8066                                                 rdev->pm.vblank_sync = true;
8067                                                 wake_up(&rdev->irq.vblank_queue);
8068                                         }
8069                                         if (atomic_read(&rdev->irq.pflip[5]))
8070                                                 radeon_crtc_handle_vblank(rdev, 5);
8071                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8072                                         DRM_DEBUG("IH: D6 vblank\n");
8073                                 }
8074                                 break;
8075                         case 1: /* D6 vline */
8076                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8077                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8078                                         DRM_DEBUG("IH: D6 vline\n");
8079                                 }
8080                                 break;
8081                         default:
8082                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8083                                 break;
8084                         }
8085                         break;
8086                 case 8: /* D1 page flip */
8087                 case 10: /* D2 page flip */
8088                 case 12: /* D3 page flip */
8089                 case 14: /* D4 page flip */
8090                 case 16: /* D5 page flip */
8091                 case 18: /* D6 page flip */
8092                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8093                         if (radeon_use_pflipirq > 0)
8094                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8095                         break;
8096                 case 42: /* HPD hotplug */
8097                         switch (src_data) {
8098                         case 0:
8099                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8100                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8101                                         queue_hotplug = true;
8102                                         DRM_DEBUG("IH: HPD1\n");
8103                                 }
8104                                 break;
8105                         case 1:
8106                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8107                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8108                                         queue_hotplug = true;
8109                                         DRM_DEBUG("IH: HPD2\n");
8110                                 }
8111                                 break;
8112                         case 2:
8113                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8114                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8115                                         queue_hotplug = true;
8116                                         DRM_DEBUG("IH: HPD3\n");
8117                                 }
8118                                 break;
8119                         case 3:
8120                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8121                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8122                                         queue_hotplug = true;
8123                                         DRM_DEBUG("IH: HPD4\n");
8124                                 }
8125                                 break;
8126                         case 4:
8127                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8128                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8129                                         queue_hotplug = true;
8130                                         DRM_DEBUG("IH: HPD5\n");
8131                                 }
8132                                 break;
8133                         case 5:
8134                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8135                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8136                                         queue_hotplug = true;
8137                                         DRM_DEBUG("IH: HPD6\n");
8138                                 }
8139                                 break;
8140                         case 6:
8141                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8142                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8143                                         queue_dp = true;
8144                                         DRM_DEBUG("IH: HPD_RX 1\n");
8145                                 }
8146                                 break;
8147                         case 7:
8148                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8149                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8150                                         queue_dp = true;
8151                                         DRM_DEBUG("IH: HPD_RX 2\n");
8152                                 }
8153                                 break;
8154                         case 8:
8155                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8156                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8157                                         queue_dp = true;
8158                                         DRM_DEBUG("IH: HPD_RX 3\n");
8159                                 }
8160                                 break;
8161                         case 9:
8162                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8163                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8164                                         queue_dp = true;
8165                                         DRM_DEBUG("IH: HPD_RX 4\n");
8166                                 }
8167                                 break;
8168                         case 10:
8169                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8170                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8171                                         queue_dp = true;
8172                                         DRM_DEBUG("IH: HPD_RX 5\n");
8173                                 }
8174                                 break;
8175                         case 11:
8176                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8177                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8178                                         queue_dp = true;
8179                                         DRM_DEBUG("IH: HPD_RX 6\n");
8180                                 }
8181                                 break;
8182                         default:
8183                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8184                                 break;
8185                         }
8186                         break;
8187                 case 96:
8188                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8189                         WREG32(SRBM_INT_ACK, 0x1);
8190                         break;
8191                 case 124: /* UVD */
8192                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8193                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8194                         break;
8195                 case 146:
8196                 case 147:
8197                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8198                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8199                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8200                         /* reset addr and status */
8201                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8202                         if (addr == 0x0 && status == 0x0)
8203                                 break;
8204                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8205                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8206                                 addr);
8207                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8208                                 status);
8209                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8210                         break;
8211                 case 167: /* VCE */
8212                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8213                         switch (src_data) {
8214                         case 0:
8215                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8216                                 break;
8217                         case 1:
8218                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8219                                 break;
8220                         default:
8221                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8222                                 break;
8223                         }
8224                         break;
8225                 case 176: /* GFX RB CP_INT */
8226                 case 177: /* GFX IB CP_INT */
8227                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8228                         break;
8229                 case 181: /* CP EOP event */
8230                         DRM_DEBUG("IH: CP EOP\n");
8231                         /* XXX check the bitfield order! */
8232                         me_id = (ring_id & 0x60) >> 5;
8233                         pipe_id = (ring_id & 0x18) >> 3;
8234                         queue_id = (ring_id & 0x7) >> 0;
8235                         switch (me_id) {
8236                         case 0:
8237                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8238                                 break;
8239                         case 1:
8240                         case 2:
8241                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8242                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8243                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8244                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8245                                 break;
8246                         }
8247                         break;
8248                 case 184: /* CP Privileged reg access */
8249                         DRM_ERROR("Illegal register access in command stream\n");
8250                         /* XXX check the bitfield order! */
8251                         me_id = (ring_id & 0x60) >> 5;
8252                         pipe_id = (ring_id & 0x18) >> 3;
8253                         queue_id = (ring_id & 0x7) >> 0;
8254                         switch (me_id) {
8255                         case 0:
8256                                 /* This results in a full GPU reset, but all we need to do is soft
8257                                  * reset the CP for gfx
8258                                  */
8259                                 queue_reset = true;
8260                                 break;
8261                         case 1:
8262                                 /* XXX compute */
8263                                 queue_reset = true;
8264                                 break;
8265                         case 2:
8266                                 /* XXX compute */
8267                                 queue_reset = true;
8268                                 break;
8269                         }
8270                         break;
8271                 case 185: /* CP Privileged inst */
8272                         DRM_ERROR("Illegal instruction in command stream\n");
8273                         /* XXX check the bitfield order! */
8274                         me_id = (ring_id & 0x60) >> 5;
8275                         pipe_id = (ring_id & 0x18) >> 3;
8276                         queue_id = (ring_id & 0x7) >> 0;
8277                         switch (me_id) {
8278                         case 0:
8279                                 /* This results in a full GPU reset, but all we need to do is soft
8280                                  * reset the CP for gfx
8281                                  */
8282                                 queue_reset = true;
8283                                 break;
8284                         case 1:
8285                                 /* XXX compute */
8286                                 queue_reset = true;
8287                                 break;
8288                         case 2:
8289                                 /* XXX compute */
8290                                 queue_reset = true;
8291                                 break;
8292                         }
8293                         break;
8294                 case 224: /* SDMA trap event */
8295                         /* XXX check the bitfield order! */
8296                         me_id = (ring_id & 0x3) >> 0;
8297                         queue_id = (ring_id & 0xc) >> 2;
8298                         DRM_DEBUG("IH: SDMA trap\n");
8299                         switch (me_id) {
8300                         case 0:
8301                                 switch (queue_id) {
8302                                 case 0:
8303                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8304                                         break;
8305                                 case 1:
8306                                         /* XXX compute */
8307                                         break;
8308                                 case 2:
8309                                         /* XXX compute */
8310                                         break;
8311                                 }
8312                                 break;
8313                         case 1:
8314                                 switch (queue_id) {
8315                                 case 0:
8316                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8317                                         break;
8318                                 case 1:
8319                                         /* XXX compute */
8320                                         break;
8321                                 case 2:
8322                                         /* XXX compute */
8323                                         break;
8324                                 }
8325                                 break;
8326                         }
8327                         break;
8328                 case 230: /* thermal low to high */
8329                         DRM_DEBUG("IH: thermal low to high\n");
8330                         rdev->pm.dpm.thermal.high_to_low = false;
8331                         queue_thermal = true;
8332                         break;
8333                 case 231: /* thermal high to low */
8334                         DRM_DEBUG("IH: thermal high to low\n");
8335                         rdev->pm.dpm.thermal.high_to_low = true;
8336                         queue_thermal = true;
8337                         break;
8338                 case 233: /* GUI IDLE */
8339                         DRM_DEBUG("IH: GUI idle\n");
8340                         break;
8341                 case 241: /* SDMA Privileged inst */
8342                 case 247: /* SDMA Privileged inst */
8343                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8344                         /* XXX check the bitfield order! */
8345                         me_id = (ring_id & 0x3) >> 0;
8346                         queue_id = (ring_id & 0xc) >> 2;
8347                         switch (me_id) {
8348                         case 0:
8349                                 switch (queue_id) {
8350                                 case 0:
8351                                         queue_reset = true;
8352                                         break;
8353                                 case 1:
8354                                         /* XXX compute */
8355                                         queue_reset = true;
8356                                         break;
8357                                 case 2:
8358                                         /* XXX compute */
8359                                         queue_reset = true;
8360                                         break;
8361                                 }
8362                                 break;
8363                         case 1:
8364                                 switch (queue_id) {
8365                                 case 0:
8366                                         queue_reset = true;
8367                                         break;
8368                                 case 1:
8369                                         /* XXX compute */
8370                                         queue_reset = true;
8371                                         break;
8372                                 case 2:
8373                                         /* XXX compute */
8374                                         queue_reset = true;
8375                                         break;
8376                                 }
8377                                 break;
8378                         }
8379                         break;
8380                 default:
8381                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8382                         break;
8383                 }
8384
8385                 /* wptr/rptr are in bytes! */
8386                 rptr += 16;
8387                 rptr &= rdev->ih.ptr_mask;
8388                 WREG32(IH_RB_RPTR, rptr);
8389         }
8390         if (queue_dp)
8391                 schedule_work(&rdev->dp_work);
8392         if (queue_hotplug)
8393                 schedule_work(&rdev->hotplug_work);
8394         if (queue_reset) {
8395                 rdev->needs_reset = true;
8396                 wake_up_all(&rdev->fence_queue);
8397         }
8398         if (queue_thermal)
8399                 schedule_work(&rdev->pm.dpm.thermal.work);
8400         rdev->ih.rptr = rptr;
8401         atomic_set(&rdev->ih.lock, 0);
8402
8403         /* make sure wptr hasn't changed while processing */
8404         wptr = cik_get_ih_wptr(rdev);
8405         if (wptr != rptr)
8406                 goto restart_ih;
8407
8408         return IRQ_HANDLED;
8409 }
8410
8411 /*
8412  * startup/shutdown callbacks
8413  */
8414 /**
8415  * cik_startup - program the asic to a functional state
8416  *
8417  * @rdev: radeon_device pointer
8418  *
8419  * Programs the asic to a functional state (CIK).
8420  * Called by cik_init() and cik_resume().
8421  * Returns 0 for success, error for failure.
8422  */
8423 static int cik_startup(struct radeon_device *rdev)
8424 {
8425         struct radeon_ring *ring;
8426         u32 nop;
8427         int r;
8428
8429         /* enable pcie gen2/3 link */
8430         cik_pcie_gen3_enable(rdev);
8431         /* enable aspm */
8432         cik_program_aspm(rdev);
8433
8434         /* scratch needs to be initialized before MC */
8435         r = r600_vram_scratch_init(rdev);
8436         if (r)
8437                 return r;
8438
8439         cik_mc_program(rdev);
8440
8441         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8442                 r = ci_mc_load_microcode(rdev);
8443                 if (r) {
8444                         DRM_ERROR("Failed to load MC firmware!\n");
8445                         return r;
8446                 }
8447         }
8448
8449         r = cik_pcie_gart_enable(rdev);
8450         if (r)
8451                 return r;
8452         cik_gpu_init(rdev);
8453
8454         /* allocate rlc buffers */
8455         if (rdev->flags & RADEON_IS_IGP) {
8456                 if (rdev->family == CHIP_KAVERI) {
8457                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8458                         rdev->rlc.reg_list_size =
8459                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8460                 } else {
8461                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8462                         rdev->rlc.reg_list_size =
8463                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8464                 }
8465         }
8466         rdev->rlc.cs_data = ci_cs_data;
8467         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8468         r = sumo_rlc_init(rdev);
8469         if (r) {
8470                 DRM_ERROR("Failed to init rlc BOs!\n");
8471                 return r;
8472         }
8473
8474         /* allocate wb buffer */
8475         r = radeon_wb_init(rdev);
8476         if (r)
8477                 return r;
8478
8479         /* allocate mec buffers */
8480         r = cik_mec_init(rdev);
8481         if (r) {
8482                 DRM_ERROR("Failed to init MEC BOs!\n");
8483                 return r;
8484         }
8485
8486         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8487         if (r) {
8488                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8489                 return r;
8490         }
8491
8492         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8493         if (r) {
8494                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8495                 return r;
8496         }
8497
8498         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8499         if (r) {
8500                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8501                 return r;
8502         }
8503
8504         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8505         if (r) {
8506                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8507                 return r;
8508         }
8509
8510         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8511         if (r) {
8512                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8513                 return r;
8514         }
8515
8516         r = radeon_uvd_resume(rdev);
8517         if (!r) {
8518                 r = uvd_v4_2_resume(rdev);
8519                 if (!r) {
8520                         r = radeon_fence_driver_start_ring(rdev,
8521                                                            R600_RING_TYPE_UVD_INDEX);
8522                         if (r)
8523                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8524                 }
8525         }
8526         if (r)
8527                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8528
8529         r = radeon_vce_resume(rdev);
8530         if (!r) {
8531                 r = vce_v2_0_resume(rdev);
8532                 if (!r)
8533                         r = radeon_fence_driver_start_ring(rdev,
8534                                                            TN_RING_TYPE_VCE1_INDEX);
8535                 if (!r)
8536                         r = radeon_fence_driver_start_ring(rdev,
8537                                                            TN_RING_TYPE_VCE2_INDEX);
8538         }
8539         if (r) {
8540                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8541                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8542                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8543         }
8544
8545         /* Enable IRQ */
8546         if (!rdev->irq.installed) {
8547                 r = radeon_irq_kms_init(rdev);
8548                 if (r)
8549                         return r;
8550         }
8551
8552         r = cik_irq_init(rdev);
8553         if (r) {
8554                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8555                 radeon_irq_kms_fini(rdev);
8556                 return r;
8557         }
8558         cik_irq_set(rdev);
8559
8560         if (rdev->family == CHIP_HAWAII) {
8561                 if (rdev->new_fw)
8562                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8563                 else
8564                         nop = RADEON_CP_PACKET2;
8565         } else {
8566                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8567         }
8568
8569         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8570         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8571                              nop);
8572         if (r)
8573                 return r;
8574
8575         /* set up the compute queues */
8576         /* type-2 packets are deprecated on MEC, use type-3 instead */
8577         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8578         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8579                              nop);
8580         if (r)
8581                 return r;
8582         ring->me = 1; /* first MEC */
8583         ring->pipe = 0; /* first pipe */
8584         ring->queue = 0; /* first queue */
8585         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8586
8587         /* type-2 packets are deprecated on MEC, use type-3 instead */
8588         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8589         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8590                              nop);
8591         if (r)
8592                 return r;
8593         /* dGPU only have 1 MEC */
8594         ring->me = 1; /* first MEC */
8595         ring->pipe = 0; /* first pipe */
8596         ring->queue = 1; /* second queue */
8597         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8598
8599         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8600         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8601                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8602         if (r)
8603                 return r;
8604
8605         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8606         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8607                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8608         if (r)
8609                 return r;
8610
8611         r = cik_cp_resume(rdev);
8612         if (r)
8613                 return r;
8614
8615         r = cik_sdma_resume(rdev);
8616         if (r)
8617                 return r;
8618
8619         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8620         if (ring->ring_size) {
8621                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8622                                      RADEON_CP_PACKET2);
8623                 if (!r)
8624                         r = uvd_v1_0_init(rdev);
8625                 if (r)
8626                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8627         }
8628
8629         r = -ENOENT;
8630
8631         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8632         if (ring->ring_size)
8633                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8634                                      VCE_CMD_NO_OP);
8635
8636         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8637         if (ring->ring_size)
8638                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8639                                      VCE_CMD_NO_OP);
8640
8641         if (!r)
8642                 r = vce_v1_0_init(rdev);
8643         else if (r != -ENOENT)
8644                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8645
8646         r = radeon_ib_pool_init(rdev);
8647         if (r) {
8648                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8649                 return r;
8650         }
8651
8652         r = radeon_vm_manager_init(rdev);
8653         if (r) {
8654                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8655                 return r;
8656         }
8657
8658         r = radeon_audio_init(rdev);
8659         if (r)
8660                 return r;
8661
8662         r = radeon_kfd_resume(rdev);
8663         if (r)
8664                 return r;
8665
8666         return 0;
8667 }
8668
8669 /**
8670  * cik_resume - resume the asic to a functional state
8671  *
8672  * @rdev: radeon_device pointer
8673  *
8674  * Programs the asic to a functional state (CIK).
8675  * Called at resume.
8676  * Returns 0 for success, error for failure.
8677  */
8678 int cik_resume(struct radeon_device *rdev)
8679 {
8680         int r;
8681
8682         /* post card */
8683         atom_asic_init(rdev->mode_info.atom_context);
8684
8685         /* init golden registers */
8686         cik_init_golden_registers(rdev);
8687
8688         if (rdev->pm.pm_method == PM_METHOD_DPM)
8689                 radeon_pm_resume(rdev);
8690
8691         rdev->accel_working = true;
8692         r = cik_startup(rdev);
8693         if (r) {
8694                 DRM_ERROR("cik startup failed on resume\n");
8695                 rdev->accel_working = false;
8696                 return r;
8697         }
8698
8699         return r;
8700
8701 }
8702
8703 /**
8704  * cik_suspend - suspend the asic
8705  *
8706  * @rdev: radeon_device pointer
8707  *
8708  * Bring the chip into a state suitable for suspend (CIK).
8709  * Called at suspend.
8710  * Returns 0 for success.
8711  */
8712 int cik_suspend(struct radeon_device *rdev)
8713 {
8714         radeon_kfd_suspend(rdev);
8715         radeon_pm_suspend(rdev);
8716         radeon_audio_fini(rdev);
8717         radeon_vm_manager_fini(rdev);
8718         cik_cp_enable(rdev, false);
8719         cik_sdma_enable(rdev, false);
8720         uvd_v1_0_fini(rdev);
8721         radeon_uvd_suspend(rdev);
8722         radeon_vce_suspend(rdev);
8723         cik_fini_pg(rdev);
8724         cik_fini_cg(rdev);
8725         cik_irq_suspend(rdev);
8726         radeon_wb_disable(rdev);
8727         cik_pcie_gart_disable(rdev);
8728         return 0;
8729 }
8730
8731 /* Plan is to move initialization in that function and use
8732  * helper function so that radeon_device_init pretty much
8733  * do nothing more than calling asic specific function. This
8734  * should also allow to remove a bunch of callback function
8735  * like vram_info.
8736  */
8737 /**
8738  * cik_init - asic specific driver and hw init
8739  *
8740  * @rdev: radeon_device pointer
8741  *
8742  * Setup asic specific driver variables and program the hw
8743  * to a functional state (CIK).
8744  * Called at driver startup.
8745  * Returns 0 for success, errors for failure.
8746  */
8747 int cik_init(struct radeon_device *rdev)
8748 {
8749         struct radeon_ring *ring;
8750         int r;
8751
8752         /* Read BIOS */
8753         if (!radeon_get_bios(rdev)) {
8754                 if (ASIC_IS_AVIVO(rdev))
8755                         return -EINVAL;
8756         }
8757         /* Must be an ATOMBIOS */
8758         if (!rdev->is_atom_bios) {
8759                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8760                 return -EINVAL;
8761         }
8762         r = radeon_atombios_init(rdev);
8763         if (r)
8764                 return r;
8765
8766         /* Post card if necessary */
8767         if (!radeon_card_posted(rdev)) {
8768                 if (!rdev->bios) {
8769                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8770                         return -EINVAL;
8771                 }
8772                 DRM_INFO("GPU not posted. posting now...\n");
8773                 atom_asic_init(rdev->mode_info.atom_context);
8774         }
8775         /* init golden registers */
8776         cik_init_golden_registers(rdev);
8777         /* Initialize scratch registers */
8778         cik_scratch_init(rdev);
8779         /* Initialize surface registers */
8780         radeon_surface_init(rdev);
8781         /* Initialize clocks */
8782         radeon_get_clock_info(rdev->ddev);
8783
8784         /* Fence driver */
8785         r = radeon_fence_driver_init(rdev);
8786         if (r)
8787                 return r;
8788
8789         /* initialize memory controller */
8790         r = cik_mc_init(rdev);
8791         if (r)
8792                 return r;
8793         /* Memory manager */
8794         r = radeon_bo_init(rdev);
8795         if (r)
8796                 return r;
8797
8798         if (rdev->flags & RADEON_IS_IGP) {
8799                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8800                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8801                         r = cik_init_microcode(rdev);
8802                         if (r) {
8803                                 DRM_ERROR("Failed to load firmware!\n");
8804                                 return r;
8805                         }
8806                 }
8807         } else {
8808                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8809                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8810                     !rdev->mc_fw) {
8811                         r = cik_init_microcode(rdev);
8812                         if (r) {
8813                                 DRM_ERROR("Failed to load firmware!\n");
8814                                 return r;
8815                         }
8816                 }
8817         }
8818
8819         /* Initialize power management */
8820         radeon_pm_init(rdev);
8821
8822         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8823         ring->ring_obj = NULL;
8824         r600_ring_init(rdev, ring, 1024 * 1024);
8825
8826         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8827         ring->ring_obj = NULL;
8828         r600_ring_init(rdev, ring, 1024 * 1024);
8829         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8830         if (r)
8831                 return r;
8832
8833         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8834         ring->ring_obj = NULL;
8835         r600_ring_init(rdev, ring, 1024 * 1024);
8836         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8837         if (r)
8838                 return r;
8839
8840         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8841         ring->ring_obj = NULL;
8842         r600_ring_init(rdev, ring, 256 * 1024);
8843
8844         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8845         ring->ring_obj = NULL;
8846         r600_ring_init(rdev, ring, 256 * 1024);
8847
8848         r = radeon_uvd_init(rdev);
8849         if (!r) {
8850                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8851                 ring->ring_obj = NULL;
8852                 r600_ring_init(rdev, ring, 4096);
8853         }
8854
8855         r = radeon_vce_init(rdev);
8856         if (!r) {
8857                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8858                 ring->ring_obj = NULL;
8859                 r600_ring_init(rdev, ring, 4096);
8860
8861                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8862                 ring->ring_obj = NULL;
8863                 r600_ring_init(rdev, ring, 4096);
8864         }
8865
8866         rdev->ih.ring_obj = NULL;
8867         r600_ih_ring_init(rdev, 64 * 1024);
8868
8869         r = r600_pcie_gart_init(rdev);
8870         if (r)
8871                 return r;
8872
8873         rdev->accel_working = true;
8874         r = cik_startup(rdev);
8875         if (r) {
8876                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8877                 cik_cp_fini(rdev);
8878                 cik_sdma_fini(rdev);
8879                 cik_irq_fini(rdev);
8880                 sumo_rlc_fini(rdev);
8881                 cik_mec_fini(rdev);
8882                 radeon_wb_fini(rdev);
8883                 radeon_ib_pool_fini(rdev);
8884                 radeon_vm_manager_fini(rdev);
8885                 radeon_irq_kms_fini(rdev);
8886                 cik_pcie_gart_fini(rdev);
8887                 rdev->accel_working = false;
8888         }
8889
8890         /* Don't start up if the MC ucode is missing.
8891          * The default clocks and voltages before the MC ucode
8892          * is loaded are not suffient for advanced operations.
8893          */
8894         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8895                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8896                 return -EINVAL;
8897         }
8898
8899         return 0;
8900 }
8901
8902 /**
8903  * cik_fini - asic specific driver and hw fini
8904  *
8905  * @rdev: radeon_device pointer
8906  *
8907  * Tear down the asic specific driver variables and program the hw
8908  * to an idle state (CIK).
8909  * Called at driver unload.
8910  */
8911 void cik_fini(struct radeon_device *rdev)
8912 {
8913         radeon_pm_fini(rdev);
8914         cik_cp_fini(rdev);
8915         cik_sdma_fini(rdev);
8916         cik_fini_pg(rdev);
8917         cik_fini_cg(rdev);
8918         cik_irq_fini(rdev);
8919         sumo_rlc_fini(rdev);
8920         cik_mec_fini(rdev);
8921         radeon_wb_fini(rdev);
8922         radeon_vm_manager_fini(rdev);
8923         radeon_ib_pool_fini(rdev);
8924         radeon_irq_kms_fini(rdev);
8925         uvd_v1_0_fini(rdev);
8926         radeon_uvd_fini(rdev);
8927         radeon_vce_fini(rdev);
8928         cik_pcie_gart_fini(rdev);
8929         r600_vram_scratch_fini(rdev);
8930         radeon_gem_fini(rdev);
8931         radeon_fence_driver_fini(rdev);
8932         radeon_bo_fini(rdev);
8933         radeon_atombios_fini(rdev);
8934         kfree(rdev->bios);
8935         rdev->bios = NULL;
8936 }
8937
8938 void dce8_program_fmt(struct drm_encoder *encoder)
8939 {
8940         struct drm_device *dev = encoder->dev;
8941         struct radeon_device *rdev = dev->dev_private;
8942         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8943         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8944         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8945         int bpc = 0;
8946         u32 tmp = 0;
8947         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8948
8949         if (connector) {
8950                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8951                 bpc = radeon_get_monitor_bpc(connector);
8952                 dither = radeon_connector->dither;
8953         }
8954
8955         /* LVDS/eDP FMT is set up by atom */
8956         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8957                 return;
8958
8959         /* not needed for analog */
8960         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8961             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8962                 return;
8963
8964         if (bpc == 0)
8965                 return;
8966
8967         switch (bpc) {
8968         case 6:
8969                 if (dither == RADEON_FMT_DITHER_ENABLE)
8970                         /* XXX sort out optimal dither settings */
8971                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8972                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8973                 else
8974                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8975                 break;
8976         case 8:
8977                 if (dither == RADEON_FMT_DITHER_ENABLE)
8978                         /* XXX sort out optimal dither settings */
8979                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8980                                 FMT_RGB_RANDOM_ENABLE |
8981                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8982                 else
8983                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8984                 break;
8985         case 10:
8986                 if (dither == RADEON_FMT_DITHER_ENABLE)
8987                         /* XXX sort out optimal dither settings */
8988                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8989                                 FMT_RGB_RANDOM_ENABLE |
8990                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8991                 else
8992                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8993                 break;
8994         default:
8995                 /* not needed */
8996                 break;
8997         }
8998
8999         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9000 }
9001
9002 /* display watermark setup */
9003 /**
9004  * dce8_line_buffer_adjust - Set up the line buffer
9005  *
9006  * @rdev: radeon_device pointer
9007  * @radeon_crtc: the selected display controller
9008  * @mode: the current display mode on the selected display
9009  * controller
9010  *
9011  * Setup up the line buffer allocation for
9012  * the selected display controller (CIK).
9013  * Returns the line buffer size in pixels.
9014  */
9015 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9016                                    struct radeon_crtc *radeon_crtc,
9017                                    struct drm_display_mode *mode)
9018 {
9019         u32 tmp, buffer_alloc, i;
9020         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9021         /*
9022          * Line Buffer Setup
9023          * There are 6 line buffers, one for each display controllers.
9024          * There are 3 partitions per LB. Select the number of partitions
9025          * to enable based on the display width.  For display widths larger
9026          * than 4096, you need use to use 2 display controllers and combine
9027          * them using the stereo blender.
9028          */
9029         if (radeon_crtc->base.enabled && mode) {
9030                 if (mode->crtc_hdisplay < 1920) {
9031                         tmp = 1;
9032                         buffer_alloc = 2;
9033                 } else if (mode->crtc_hdisplay < 2560) {
9034                         tmp = 2;
9035                         buffer_alloc = 2;
9036                 } else if (mode->crtc_hdisplay < 4096) {
9037                         tmp = 0;
9038                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9039                 } else {
9040                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9041                         tmp = 0;
9042                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9043                 }
9044         } else {
9045                 tmp = 1;
9046                 buffer_alloc = 0;
9047         }
9048
9049         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9050                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9051
9052         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9053                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9054         for (i = 0; i < rdev->usec_timeout; i++) {
9055                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9056                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9057                         break;
9058                 udelay(1);
9059         }
9060
9061         if (radeon_crtc->base.enabled && mode) {
9062                 switch (tmp) {
9063                 case 0:
9064                 default:
9065                         return 4096 * 2;
9066                 case 1:
9067                         return 1920 * 2;
9068                 case 2:
9069                         return 2560 * 2;
9070                 }
9071         }
9072
9073         /* controller not enabled, so no lb used */
9074         return 0;
9075 }
9076
9077 /**
9078  * cik_get_number_of_dram_channels - get the number of dram channels
9079  *
9080  * @rdev: radeon_device pointer
9081  *
9082  * Look up the number of video ram channels (CIK).
9083  * Used for display watermark bandwidth calculations
9084  * Returns the number of dram channels
9085  */
9086 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9087 {
9088         u32 tmp = RREG32(MC_SHARED_CHMAP);
9089
9090         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9091         case 0:
9092         default:
9093                 return 1;
9094         case 1:
9095                 return 2;
9096         case 2:
9097                 return 4;
9098         case 3:
9099                 return 8;
9100         case 4:
9101                 return 3;
9102         case 5:
9103                 return 6;
9104         case 6:
9105                 return 10;
9106         case 7:
9107                 return 12;
9108         case 8:
9109                 return 16;
9110         }
9111 }
9112
9113 struct dce8_wm_params {
9114         u32 dram_channels; /* number of dram channels */
9115         u32 yclk;          /* bandwidth per dram data pin in kHz */
9116         u32 sclk;          /* engine clock in kHz */
9117         u32 disp_clk;      /* display clock in kHz */
9118         u32 src_width;     /* viewport width */
9119         u32 active_time;   /* active display time in ns */
9120         u32 blank_time;    /* blank time in ns */
9121         bool interlaced;    /* mode is interlaced */
9122         fixed20_12 vsc;    /* vertical scale ratio */
9123         u32 num_heads;     /* number of active crtcs */
9124         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9125         u32 lb_size;       /* line buffer allocated to pipe */
9126         u32 vtaps;         /* vertical scaler taps */
9127 };
9128
9129 /**
9130  * dce8_dram_bandwidth - get the dram bandwidth
9131  *
9132  * @wm: watermark calculation data
9133  *
9134  * Calculate the raw dram bandwidth (CIK).
9135  * Used for display watermark bandwidth calculations
9136  * Returns the dram bandwidth in MBytes/s
9137  */
9138 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9139 {
9140         /* Calculate raw DRAM Bandwidth */
9141         fixed20_12 dram_efficiency; /* 0.7 */
9142         fixed20_12 yclk, dram_channels, bandwidth;
9143         fixed20_12 a;
9144
9145         a.full = dfixed_const(1000);
9146         yclk.full = dfixed_const(wm->yclk);
9147         yclk.full = dfixed_div(yclk, a);
9148         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9149         a.full = dfixed_const(10);
9150         dram_efficiency.full = dfixed_const(7);
9151         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9152         bandwidth.full = dfixed_mul(dram_channels, yclk);
9153         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9154
9155         return dfixed_trunc(bandwidth);
9156 }
9157
9158 /**
9159  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9160  *
9161  * @wm: watermark calculation data
9162  *
9163  * Calculate the dram bandwidth used for display (CIK).
9164  * Used for display watermark bandwidth calculations
9165  * Returns the dram bandwidth for display in MBytes/s
9166  */
9167 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9168 {
9169         /* Calculate DRAM Bandwidth and the part allocated to display. */
9170         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9171         fixed20_12 yclk, dram_channels, bandwidth;
9172         fixed20_12 a;
9173
9174         a.full = dfixed_const(1000);
9175         yclk.full = dfixed_const(wm->yclk);
9176         yclk.full = dfixed_div(yclk, a);
9177         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9178         a.full = dfixed_const(10);
9179         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9180         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9181         bandwidth.full = dfixed_mul(dram_channels, yclk);
9182         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9183
9184         return dfixed_trunc(bandwidth);
9185 }
9186
9187 /**
9188  * dce8_data_return_bandwidth - get the data return bandwidth
9189  *
9190  * @wm: watermark calculation data
9191  *
9192  * Calculate the data return bandwidth used for display (CIK).
9193  * Used for display watermark bandwidth calculations
9194  * Returns the data return bandwidth in MBytes/s
9195  */
9196 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9197 {
9198         /* Calculate the display Data return Bandwidth */
9199         fixed20_12 return_efficiency; /* 0.8 */
9200         fixed20_12 sclk, bandwidth;
9201         fixed20_12 a;
9202
9203         a.full = dfixed_const(1000);
9204         sclk.full = dfixed_const(wm->sclk);
9205         sclk.full = dfixed_div(sclk, a);
9206         a.full = dfixed_const(10);
9207         return_efficiency.full = dfixed_const(8);
9208         return_efficiency.full = dfixed_div(return_efficiency, a);
9209         a.full = dfixed_const(32);
9210         bandwidth.full = dfixed_mul(a, sclk);
9211         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9212
9213         return dfixed_trunc(bandwidth);
9214 }
9215
9216 /**
9217  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9218  *
9219  * @wm: watermark calculation data
9220  *
9221  * Calculate the dmif bandwidth used for display (CIK).
9222  * Used for display watermark bandwidth calculations
9223  * Returns the dmif bandwidth in MBytes/s
9224  */
9225 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9226 {
9227         /* Calculate the DMIF Request Bandwidth */
9228         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9229         fixed20_12 disp_clk, bandwidth;
9230         fixed20_12 a, b;
9231
9232         a.full = dfixed_const(1000);
9233         disp_clk.full = dfixed_const(wm->disp_clk);
9234         disp_clk.full = dfixed_div(disp_clk, a);
9235         a.full = dfixed_const(32);
9236         b.full = dfixed_mul(a, disp_clk);
9237
9238         a.full = dfixed_const(10);
9239         disp_clk_request_efficiency.full = dfixed_const(8);
9240         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9241
9242         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9243
9244         return dfixed_trunc(bandwidth);
9245 }
9246
9247 /**
9248  * dce8_available_bandwidth - get the min available bandwidth
9249  *
9250  * @wm: watermark calculation data
9251  *
9252  * Calculate the min available bandwidth used for display (CIK).
9253  * Used for display watermark bandwidth calculations
9254  * Returns the min available bandwidth in MBytes/s
9255  */
9256 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9257 {
9258         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9259         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9260         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9261         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9262
9263         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9264 }
9265
9266 /**
9267  * dce8_average_bandwidth - get the average available bandwidth
9268  *
9269  * @wm: watermark calculation data
9270  *
9271  * Calculate the average available bandwidth used for display (CIK).
9272  * Used for display watermark bandwidth calculations
9273  * Returns the average available bandwidth in MBytes/s
9274  */
9275 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9276 {
9277         /* Calculate the display mode Average Bandwidth
9278          * DisplayMode should contain the source and destination dimensions,
9279          * timing, etc.
9280          */
9281         fixed20_12 bpp;
9282         fixed20_12 line_time;
9283         fixed20_12 src_width;
9284         fixed20_12 bandwidth;
9285         fixed20_12 a;
9286
9287         a.full = dfixed_const(1000);
9288         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9289         line_time.full = dfixed_div(line_time, a);
9290         bpp.full = dfixed_const(wm->bytes_per_pixel);
9291         src_width.full = dfixed_const(wm->src_width);
9292         bandwidth.full = dfixed_mul(src_width, bpp);
9293         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9294         bandwidth.full = dfixed_div(bandwidth, line_time);
9295
9296         return dfixed_trunc(bandwidth);
9297 }
9298
9299 /**
9300  * dce8_latency_watermark - get the latency watermark
9301  *
9302  * @wm: watermark calculation data
9303  *
9304  * Calculate the latency watermark (CIK).
9305  * Used for display watermark bandwidth calculations
9306  * Returns the latency watermark in ns
9307  */
9308 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9309 {
9310         /* First calculate the latency in ns */
9311         u32 mc_latency = 2000; /* 2000 ns. */
9312         u32 available_bandwidth = dce8_available_bandwidth(wm);
9313         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9314         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9315         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9316         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9317                 (wm->num_heads * cursor_line_pair_return_time);
9318         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9319         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9320         u32 tmp, dmif_size = 12288;
9321         fixed20_12 a, b, c;
9322
9323         if (wm->num_heads == 0)
9324                 return 0;
9325
9326         a.full = dfixed_const(2);
9327         b.full = dfixed_const(1);
9328         if ((wm->vsc.full > a.full) ||
9329             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9330             (wm->vtaps >= 5) ||
9331             ((wm->vsc.full >= a.full) && wm->interlaced))
9332                 max_src_lines_per_dst_line = 4;
9333         else
9334                 max_src_lines_per_dst_line = 2;
9335
9336         a.full = dfixed_const(available_bandwidth);
9337         b.full = dfixed_const(wm->num_heads);
9338         a.full = dfixed_div(a, b);
9339
9340         b.full = dfixed_const(mc_latency + 512);
9341         c.full = dfixed_const(wm->disp_clk);
9342         b.full = dfixed_div(b, c);
9343
9344         c.full = dfixed_const(dmif_size);
9345         b.full = dfixed_div(c, b);
9346
9347         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9348
9349         b.full = dfixed_const(1000);
9350         c.full = dfixed_const(wm->disp_clk);
9351         b.full = dfixed_div(c, b);
9352         c.full = dfixed_const(wm->bytes_per_pixel);
9353         b.full = dfixed_mul(b, c);
9354
9355         lb_fill_bw = min(tmp, dfixed_trunc(b));
9356
9357         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9358         b.full = dfixed_const(1000);
9359         c.full = dfixed_const(lb_fill_bw);
9360         b.full = dfixed_div(c, b);
9361         a.full = dfixed_div(a, b);
9362         line_fill_time = dfixed_trunc(a);
9363
9364         if (line_fill_time < wm->active_time)
9365                 return latency;
9366         else
9367                 return latency + (line_fill_time - wm->active_time);
9368
9369 }
9370
9371 /**
9372  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9373  * average and available dram bandwidth
9374  *
9375  * @wm: watermark calculation data
9376  *
9377  * Check if the display average bandwidth fits in the display
9378  * dram bandwidth (CIK).
9379  * Used for display watermark bandwidth calculations
9380  * Returns true if the display fits, false if not.
9381  */
9382 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9383 {
9384         if (dce8_average_bandwidth(wm) <=
9385             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9386                 return true;
9387         else
9388                 return false;
9389 }
9390
9391 /**
9392  * dce8_average_bandwidth_vs_available_bandwidth - check
9393  * average and available bandwidth
9394  *
9395  * @wm: watermark calculation data
9396  *
9397  * Check if the display average bandwidth fits in the display
9398  * available bandwidth (CIK).
9399  * Used for display watermark bandwidth calculations
9400  * Returns true if the display fits, false if not.
9401  */
9402 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9403 {
9404         if (dce8_average_bandwidth(wm) <=
9405             (dce8_available_bandwidth(wm) / wm->num_heads))
9406                 return true;
9407         else
9408                 return false;
9409 }
9410
9411 /**
9412  * dce8_check_latency_hiding - check latency hiding
9413  *
9414  * @wm: watermark calculation data
9415  *
9416  * Check latency hiding (CIK).
9417  * Used for display watermark bandwidth calculations
9418  * Returns true if the display fits, false if not.
9419  */
9420 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9421 {
9422         u32 lb_partitions = wm->lb_size / wm->src_width;
9423         u32 line_time = wm->active_time + wm->blank_time;
9424         u32 latency_tolerant_lines;
9425         u32 latency_hiding;
9426         fixed20_12 a;
9427
9428         a.full = dfixed_const(1);
9429         if (wm->vsc.full > a.full)
9430                 latency_tolerant_lines = 1;
9431         else {
9432                 if (lb_partitions <= (wm->vtaps + 1))
9433                         latency_tolerant_lines = 1;
9434                 else
9435                         latency_tolerant_lines = 2;
9436         }
9437
9438         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9439
9440         if (dce8_latency_watermark(wm) <= latency_hiding)
9441                 return true;
9442         else
9443                 return false;
9444 }
9445
9446 /**
9447  * dce8_program_watermarks - program display watermarks
9448  *
9449  * @rdev: radeon_device pointer
9450  * @radeon_crtc: the selected display controller
9451  * @lb_size: line buffer size
9452  * @num_heads: number of display controllers in use
9453  *
9454  * Calculate and program the display watermarks for the
9455  * selected display controller (CIK).
9456  */
9457 static void dce8_program_watermarks(struct radeon_device *rdev,
9458                                     struct radeon_crtc *radeon_crtc,
9459                                     u32 lb_size, u32 num_heads)
9460 {
9461         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9462         struct dce8_wm_params wm_low, wm_high;
9463         u32 pixel_period;
9464         u32 line_time = 0;
9465         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9466         u32 tmp, wm_mask;
9467
9468         if (radeon_crtc->base.enabled && num_heads && mode) {
9469                 pixel_period = 1000000 / (u32)mode->clock;
9470                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9471
9472                 /* watermark for high clocks */
9473                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9474                     rdev->pm.dpm_enabled) {
9475                         wm_high.yclk =
9476                                 radeon_dpm_get_mclk(rdev, false) * 10;
9477                         wm_high.sclk =
9478                                 radeon_dpm_get_sclk(rdev, false) * 10;
9479                 } else {
9480                         wm_high.yclk = rdev->pm.current_mclk * 10;
9481                         wm_high.sclk = rdev->pm.current_sclk * 10;
9482                 }
9483
9484                 wm_high.disp_clk = mode->clock;
9485                 wm_high.src_width = mode->crtc_hdisplay;
9486                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9487                 wm_high.blank_time = line_time - wm_high.active_time;
9488                 wm_high.interlaced = false;
9489                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9490                         wm_high.interlaced = true;
9491                 wm_high.vsc = radeon_crtc->vsc;
9492                 wm_high.vtaps = 1;
9493                 if (radeon_crtc->rmx_type != RMX_OFF)
9494                         wm_high.vtaps = 2;
9495                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9496                 wm_high.lb_size = lb_size;
9497                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9498                 wm_high.num_heads = num_heads;
9499
9500                 /* set for high clocks */
9501                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9502
9503                 /* possibly force display priority to high */
9504                 /* should really do this at mode validation time... */
9505                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9506                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9507                     !dce8_check_latency_hiding(&wm_high) ||
9508                     (rdev->disp_priority == 2)) {
9509                         DRM_DEBUG_KMS("force priority to high\n");
9510                 }
9511
9512                 /* watermark for low clocks */
9513                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9514                     rdev->pm.dpm_enabled) {
9515                         wm_low.yclk =
9516                                 radeon_dpm_get_mclk(rdev, true) * 10;
9517                         wm_low.sclk =
9518                                 radeon_dpm_get_sclk(rdev, true) * 10;
9519                 } else {
9520                         wm_low.yclk = rdev->pm.current_mclk * 10;
9521                         wm_low.sclk = rdev->pm.current_sclk * 10;
9522                 }
9523
9524                 wm_low.disp_clk = mode->clock;
9525                 wm_low.src_width = mode->crtc_hdisplay;
9526                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9527                 wm_low.blank_time = line_time - wm_low.active_time;
9528                 wm_low.interlaced = false;
9529                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9530                         wm_low.interlaced = true;
9531                 wm_low.vsc = radeon_crtc->vsc;
9532                 wm_low.vtaps = 1;
9533                 if (radeon_crtc->rmx_type != RMX_OFF)
9534                         wm_low.vtaps = 2;
9535                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9536                 wm_low.lb_size = lb_size;
9537                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9538                 wm_low.num_heads = num_heads;
9539
9540                 /* set for low clocks */
9541                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9542
9543                 /* possibly force display priority to high */
9544                 /* should really do this at mode validation time... */
9545                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9546                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9547                     !dce8_check_latency_hiding(&wm_low) ||
9548                     (rdev->disp_priority == 2)) {
9549                         DRM_DEBUG_KMS("force priority to high\n");
9550                 }
9551         }
9552
9553         /* select wm A */
9554         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9555         tmp = wm_mask;
9556         tmp &= ~LATENCY_WATERMARK_MASK(3);
9557         tmp |= LATENCY_WATERMARK_MASK(1);
9558         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9559         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9560                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9561                 LATENCY_HIGH_WATERMARK(line_time)));
9562         /* select wm B */
9563         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9564         tmp &= ~LATENCY_WATERMARK_MASK(3);
9565         tmp |= LATENCY_WATERMARK_MASK(2);
9566         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9567         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9568                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9569                 LATENCY_HIGH_WATERMARK(line_time)));
9570         /* restore original selection */
9571         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9572
9573         /* save values for DPM */
9574         radeon_crtc->line_time = line_time;
9575         radeon_crtc->wm_high = latency_watermark_a;
9576         radeon_crtc->wm_low = latency_watermark_b;
9577 }
9578
9579 /**
9580  * dce8_bandwidth_update - program display watermarks
9581  *
9582  * @rdev: radeon_device pointer
9583  *
9584  * Calculate and program the display watermarks and line
9585  * buffer allocation (CIK).
9586  */
9587 void dce8_bandwidth_update(struct radeon_device *rdev)
9588 {
9589         struct drm_display_mode *mode = NULL;
9590         u32 num_heads = 0, lb_size;
9591         int i;
9592
9593         if (!rdev->mode_info.mode_config_initialized)
9594                 return;
9595
9596         radeon_update_display_priority(rdev);
9597
9598         for (i = 0; i < rdev->num_crtc; i++) {
9599                 if (rdev->mode_info.crtcs[i]->base.enabled)
9600                         num_heads++;
9601         }
9602         for (i = 0; i < rdev->num_crtc; i++) {
9603                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9604                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9605                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9606         }
9607 }
9608
9609 /**
9610  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9611  *
9612  * @rdev: radeon_device pointer
9613  *
9614  * Fetches a GPU clock counter snapshot (SI).
9615  * Returns the 64 bit clock counter snapshot.
9616  */
9617 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9618 {
9619         uint64_t clock;
9620
9621         mutex_lock(&rdev->gpu_clock_mutex);
9622         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9623         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9624                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9625         mutex_unlock(&rdev->gpu_clock_mutex);
9626         return clock;
9627 }
9628
9629 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9630                               u32 cntl_reg, u32 status_reg)
9631 {
9632         int r, i;
9633         struct atom_clock_dividers dividers;
9634         uint32_t tmp;
9635
9636         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9637                                            clock, false, &dividers);
9638         if (r)
9639                 return r;
9640
9641         tmp = RREG32_SMC(cntl_reg);
9642         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9643         tmp |= dividers.post_divider;
9644         WREG32_SMC(cntl_reg, tmp);
9645
9646         for (i = 0; i < 100; i++) {
9647                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9648                         break;
9649                 mdelay(10);
9650         }
9651         if (i == 100)
9652                 return -ETIMEDOUT;
9653
9654         return 0;
9655 }
9656
9657 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9658 {
9659         int r = 0;
9660
9661         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9662         if (r)
9663                 return r;
9664
9665         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9666         return r;
9667 }
9668
9669 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9670 {
9671         int r, i;
9672         struct atom_clock_dividers dividers;
9673         u32 tmp;
9674
9675         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9676                                            ecclk, false, &dividers);
9677         if (r)
9678                 return r;
9679
9680         for (i = 0; i < 100; i++) {
9681                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9682                         break;
9683                 mdelay(10);
9684         }
9685         if (i == 100)
9686                 return -ETIMEDOUT;
9687
9688         tmp = RREG32_SMC(CG_ECLK_CNTL);
9689         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9690         tmp |= dividers.post_divider;
9691         WREG32_SMC(CG_ECLK_CNTL, tmp);
9692
9693         for (i = 0; i < 100; i++) {
9694                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9695                         break;
9696                 mdelay(10);
9697         }
9698         if (i == 100)
9699                 return -ETIMEDOUT;
9700
9701         return 0;
9702 }
9703
9704 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9705 {
9706         struct pci_dev *root = rdev->pdev->bus->self;
9707         int bridge_pos, gpu_pos;
9708         u32 speed_cntl, mask, current_data_rate;
9709         int ret, i;
9710         u16 tmp16;
9711
9712         if (pci_is_root_bus(rdev->pdev->bus))
9713                 return;
9714
9715         if (radeon_pcie_gen2 == 0)
9716                 return;
9717
9718         if (rdev->flags & RADEON_IS_IGP)
9719                 return;
9720
9721         if (!(rdev->flags & RADEON_IS_PCIE))
9722                 return;
9723
9724         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9725         if (ret != 0)
9726                 return;
9727
9728         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9729                 return;
9730
9731         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9732         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9733                 LC_CURRENT_DATA_RATE_SHIFT;
9734         if (mask & DRM_PCIE_SPEED_80) {
9735                 if (current_data_rate == 2) {
9736                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9737                         return;
9738                 }
9739                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9740         } else if (mask & DRM_PCIE_SPEED_50) {
9741                 if (current_data_rate == 1) {
9742                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9743                         return;
9744                 }
9745                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9746         }
9747
9748         bridge_pos = pci_pcie_cap(root);
9749         if (!bridge_pos)
9750                 return;
9751
9752         gpu_pos = pci_pcie_cap(rdev->pdev);
9753         if (!gpu_pos)
9754                 return;
9755
9756         if (mask & DRM_PCIE_SPEED_80) {
9757                 /* re-try equalization if gen3 is not already enabled */
9758                 if (current_data_rate != 2) {
9759                         u16 bridge_cfg, gpu_cfg;
9760                         u16 bridge_cfg2, gpu_cfg2;
9761                         u32 max_lw, current_lw, tmp;
9762
9763                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9764                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9765
9766                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9767                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9768
9769                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9770                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9771
9772                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9773                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9774                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9775
9776                         if (current_lw < max_lw) {
9777                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9778                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9779                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9780                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9781                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9782                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9783                                 }
9784                         }
9785
9786                         for (i = 0; i < 10; i++) {
9787                                 /* check status */
9788                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9789                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9790                                         break;
9791
9792                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9793                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9794
9795                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9796                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9797
9798                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9799                                 tmp |= LC_SET_QUIESCE;
9800                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9801
9802                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9803                                 tmp |= LC_REDO_EQ;
9804                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9805
9806                                 mdelay(100);
9807
9808                                 /* linkctl */
9809                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9810                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9811                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9812                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9813
9814                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9815                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9816                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9817                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9818
9819                                 /* linkctl2 */
9820                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9821                                 tmp16 &= ~((1 << 4) | (7 << 9));
9822                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9823                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9824
9825                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9826                                 tmp16 &= ~((1 << 4) | (7 << 9));
9827                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9828                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9829
9830                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9831                                 tmp &= ~LC_SET_QUIESCE;
9832                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9833                         }
9834                 }
9835         }
9836
9837         /* set the link speed */
9838         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9839         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9840         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9841
9842         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9843         tmp16 &= ~0xf;
9844         if (mask & DRM_PCIE_SPEED_80)
9845                 tmp16 |= 3; /* gen3 */
9846         else if (mask & DRM_PCIE_SPEED_50)
9847                 tmp16 |= 2; /* gen2 */
9848         else
9849                 tmp16 |= 1; /* gen1 */
9850         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9851
9852         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9853         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9854         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9855
9856         for (i = 0; i < rdev->usec_timeout; i++) {
9857                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9858                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9859                         break;
9860                 udelay(1);
9861         }
9862 }
9863
9864 static void cik_program_aspm(struct radeon_device *rdev)
9865 {
9866         u32 data, orig;
9867         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9868         bool disable_clkreq = false;
9869
9870         if (radeon_aspm == 0)
9871                 return;
9872
9873         /* XXX double check IGPs */
9874         if (rdev->flags & RADEON_IS_IGP)
9875                 return;
9876
9877         if (!(rdev->flags & RADEON_IS_PCIE))
9878                 return;
9879
9880         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9881         data &= ~LC_XMIT_N_FTS_MASK;
9882         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9883         if (orig != data)
9884                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9885
9886         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9887         data |= LC_GO_TO_RECOVERY;
9888         if (orig != data)
9889                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9890
9891         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9892         data |= P_IGNORE_EDB_ERR;
9893         if (orig != data)
9894                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9895
9896         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9897         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9898         data |= LC_PMI_TO_L1_DIS;
9899         if (!disable_l0s)
9900                 data |= LC_L0S_INACTIVITY(7);
9901
9902         if (!disable_l1) {
9903                 data |= LC_L1_INACTIVITY(7);
9904                 data &= ~LC_PMI_TO_L1_DIS;
9905                 if (orig != data)
9906                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9907
9908                 if (!disable_plloff_in_l1) {
9909                         bool clk_req_support;
9910
9911                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9912                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9913                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9914                         if (orig != data)
9915                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9916
9917                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9918                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9919                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9920                         if (orig != data)
9921                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9922
9923                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9924                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9925                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9926                         if (orig != data)
9927                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9928
9929                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9930                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9931                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9932                         if (orig != data)
9933                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9934
9935                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9936                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9937                         data |= LC_DYN_LANES_PWR_STATE(3);
9938                         if (orig != data)
9939                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9940
9941                         if (!disable_clkreq &&
9942                             !pci_is_root_bus(rdev->pdev->bus)) {
9943                                 struct pci_dev *root = rdev->pdev->bus->self;
9944                                 u32 lnkcap;
9945
9946                                 clk_req_support = false;
9947                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9948                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9949                                         clk_req_support = true;
9950                         } else {
9951                                 clk_req_support = false;
9952                         }
9953
9954                         if (clk_req_support) {
9955                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9956                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9957                                 if (orig != data)
9958                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9959
9960                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9961                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9962                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9963                                 if (orig != data)
9964                                         WREG32_SMC(THM_CLK_CNTL, data);
9965
9966                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9967                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9968                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9969                                 if (orig != data)
9970                                         WREG32_SMC(MISC_CLK_CTRL, data);
9971
9972                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9973                                 data &= ~BCLK_AS_XCLK;
9974                                 if (orig != data)
9975                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9976
9977                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9978                                 data &= ~FORCE_BIF_REFCLK_EN;
9979                                 if (orig != data)
9980                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9981
9982                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9983                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9984                                 data |= MPLL_CLKOUT_SEL(4);
9985                                 if (orig != data)
9986                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9987                         }
9988                 }
9989         } else {
9990                 if (orig != data)
9991                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9992         }
9993
9994         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9995         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9996         if (orig != data)
9997                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9998
9999         if (!disable_l0s) {
10000                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10001                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10002                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10003                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10004                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10005                                 data &= ~LC_L0S_INACTIVITY_MASK;
10006                                 if (orig != data)
10007                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10008                         }
10009                 }
10010         }
10011 }