]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon: add large PTE support for NI, SI and CIK v5
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66
67 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
68 extern void r600_ih_ring_fini(struct radeon_device *rdev);
69 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
70 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72 extern void sumo_rlc_fini(struct radeon_device *rdev);
73 extern int sumo_rlc_init(struct radeon_device *rdev);
74 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
75 extern void si_rlc_reset(struct radeon_device *rdev);
76 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
77 extern int cik_sdma_resume(struct radeon_device *rdev);
78 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
79 extern void cik_sdma_fini(struct radeon_device *rdev);
80 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
81 static void cik_rlc_stop(struct radeon_device *rdev);
82 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
83 static void cik_program_aspm(struct radeon_device *rdev);
84 static void cik_init_pg(struct radeon_device *rdev);
85 static void cik_init_cg(struct radeon_device *rdev);
86 static void cik_fini_pg(struct radeon_device *rdev);
87 static void cik_fini_cg(struct radeon_device *rdev);
88 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
89                                           bool enable);
90
91 /* get temperature in millidegrees */
92 int ci_get_temp(struct radeon_device *rdev)
93 {
94         u32 temp;
95         int actual_temp = 0;
96
97         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
98                 CTF_TEMP_SHIFT;
99
100         if (temp & 0x200)
101                 actual_temp = 255;
102         else
103                 actual_temp = temp & 0x1ff;
104
105         actual_temp = actual_temp * 1000;
106
107         return actual_temp;
108 }
109
110 /* get temperature in millidegrees */
111 int kv_get_temp(struct radeon_device *rdev)
112 {
113         u32 temp;
114         int actual_temp = 0;
115
116         temp = RREG32_SMC(0xC0300E0C);
117
118         if (temp)
119                 actual_temp = (temp / 8) - 49;
120         else
121                 actual_temp = 0;
122
123         actual_temp = actual_temp * 1000;
124
125         return actual_temp;
126 }
127
128 /*
129  * Indirect registers accessor
130  */
131 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
132 {
133         unsigned long flags;
134         u32 r;
135
136         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
137         WREG32(PCIE_INDEX, reg);
138         (void)RREG32(PCIE_INDEX);
139         r = RREG32(PCIE_DATA);
140         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
141         return r;
142 }
143
144 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
145 {
146         unsigned long flags;
147
148         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
149         WREG32(PCIE_INDEX, reg);
150         (void)RREG32(PCIE_INDEX);
151         WREG32(PCIE_DATA, v);
152         (void)RREG32(PCIE_DATA);
153         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
154 }
155
156 static const u32 spectre_rlc_save_restore_register_list[] =
157 {
158         (0x0e00 << 16) | (0xc12c >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc140 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc150 >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc15c >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc168 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc170 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc178 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc204 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0xc2b4 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0xc2b8 >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0xc2bc >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0xc2c0 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0x8228 >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x829c >> 2),
185         0x00000000,
186         (0x0e00 << 16) | (0x869c >> 2),
187         0x00000000,
188         (0x0600 << 16) | (0x98f4 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x98f8 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0x9900 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0xc260 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x90e8 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0x3c000 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0x3c00c >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0x8c1c >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0x9700 >> 2),
205         0x00000000,
206         (0x0e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x4e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x5e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0x6e00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0x7e00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0x8e00 << 16) | (0xcd20 >> 2),
217         0x00000000,
218         (0x9e00 << 16) | (0xcd20 >> 2),
219         0x00000000,
220         (0xae00 << 16) | (0xcd20 >> 2),
221         0x00000000,
222         (0xbe00 << 16) | (0xcd20 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x89bc >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0x8900 >> 2),
227         0x00000000,
228         0x3,
229         (0x0e00 << 16) | (0xc130 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc134 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc1fc >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc208 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc264 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc268 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc26c >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc270 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc274 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc278 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc27c >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc280 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc284 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc288 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc28c >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc290 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc294 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc298 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc29c >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2a0 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc2a4 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc2a8 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc2ac  >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc2b0 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x301d0 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x30238 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0x30250 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0x30254 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0x30258 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0x3025c >> 2),
288         0x00000000,
289         (0x4e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x5e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0x6e00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0x7e00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0x8e00 << 16) | (0xc900 >> 2),
298         0x00000000,
299         (0x9e00 << 16) | (0xc900 >> 2),
300         0x00000000,
301         (0xae00 << 16) | (0xc900 >> 2),
302         0x00000000,
303         (0xbe00 << 16) | (0xc900 >> 2),
304         0x00000000,
305         (0x4e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x5e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0x6e00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0x7e00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0x8e00 << 16) | (0xc904 >> 2),
314         0x00000000,
315         (0x9e00 << 16) | (0xc904 >> 2),
316         0x00000000,
317         (0xae00 << 16) | (0xc904 >> 2),
318         0x00000000,
319         (0xbe00 << 16) | (0xc904 >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xc908 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xc908 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xc908 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xc908 >> 2),
336         0x00000000,
337         (0x4e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x5e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0x6e00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0x7e00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0x8e00 << 16) | (0xc90c >> 2),
346         0x00000000,
347         (0x9e00 << 16) | (0xc90c >> 2),
348         0x00000000,
349         (0xae00 << 16) | (0xc90c >> 2),
350         0x00000000,
351         (0xbe00 << 16) | (0xc90c >> 2),
352         0x00000000,
353         (0x4e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x5e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0x6e00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0x7e00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0x8e00 << 16) | (0xc910 >> 2),
362         0x00000000,
363         (0x9e00 << 16) | (0xc910 >> 2),
364         0x00000000,
365         (0xae00 << 16) | (0xc910 >> 2),
366         0x00000000,
367         (0xbe00 << 16) | (0xc910 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0xc99c >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0x9834 >> 2),
372         0x00000000,
373         (0x0000 << 16) | (0x30f00 >> 2),
374         0x00000000,
375         (0x0001 << 16) | (0x30f00 >> 2),
376         0x00000000,
377         (0x0000 << 16) | (0x30f04 >> 2),
378         0x00000000,
379         (0x0001 << 16) | (0x30f04 >> 2),
380         0x00000000,
381         (0x0000 << 16) | (0x30f08 >> 2),
382         0x00000000,
383         (0x0001 << 16) | (0x30f08 >> 2),
384         0x00000000,
385         (0x0000 << 16) | (0x30f0c >> 2),
386         0x00000000,
387         (0x0001 << 16) | (0x30f0c >> 2),
388         0x00000000,
389         (0x0600 << 16) | (0x9b7c >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8a14 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x8a18 >> 2),
394         0x00000000,
395         (0x0600 << 16) | (0x30a00 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x8bf0 >> 2),
398         0x00000000,
399         (0x0e00 << 16) | (0x8bcc >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0x8b24 >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0x30a04 >> 2),
404         0x00000000,
405         (0x0600 << 16) | (0x30a10 >> 2),
406         0x00000000,
407         (0x0600 << 16) | (0x30a14 >> 2),
408         0x00000000,
409         (0x0600 << 16) | (0x30a18 >> 2),
410         0x00000000,
411         (0x0600 << 16) | (0x30a2c >> 2),
412         0x00000000,
413         (0x0e00 << 16) | (0xc700 >> 2),
414         0x00000000,
415         (0x0e00 << 16) | (0xc704 >> 2),
416         0x00000000,
417         (0x0e00 << 16) | (0xc708 >> 2),
418         0x00000000,
419         (0x0e00 << 16) | (0xc768 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc770 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc774 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc778 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc77c >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc780 >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc784 >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc788 >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc78c >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc798 >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc79c >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7a0 >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7a4 >> 2),
444         0x00000000,
445         (0x0400 << 16) | (0xc7a8 >> 2),
446         0x00000000,
447         (0x0400 << 16) | (0xc7ac >> 2),
448         0x00000000,
449         (0x0400 << 16) | (0xc7b0 >> 2),
450         0x00000000,
451         (0x0400 << 16) | (0xc7b4 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x9100 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x3c010 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92a8 >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92ac >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92b4 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92b8 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92bc >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92c0 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x92c4 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x92c8 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x92cc >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x92d0 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c00 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8c04 >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0x8c20 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x8c38 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x8c3c >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xae00 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x9604 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac08 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac0c >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac10 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac14 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac58 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac68 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac6c >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac70 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac74 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac78 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac7c >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xac80 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0xac84 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0xac88 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0xac8c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x970c >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x9714 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x9718 >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x971c >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x4e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x5e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0x6e00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0x7e00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0x8e00 << 16) | (0x31068 >> 2),
540         0x00000000,
541         (0x9e00 << 16) | (0x31068 >> 2),
542         0x00000000,
543         (0xae00 << 16) | (0x31068 >> 2),
544         0x00000000,
545         (0xbe00 << 16) | (0x31068 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xcd10 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xcd14 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x88b0 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x88b4 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88b8 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x88bc >> 2),
558         0x00000000,
559         (0x0400 << 16) | (0x89c0 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88c4 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x88c8 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x88d0 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x88d4 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x88d8 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x8980 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x30938 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x3093c >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x30940 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x89a0 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x30900 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x30904 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x89b4 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x3c210 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x3c214 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x3c218 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0x8904 >> 2),
594         0x00000000,
595         0x5,
596         (0x0e00 << 16) | (0x8c28 >> 2),
597         (0x0e00 << 16) | (0x8c2c >> 2),
598         (0x0e00 << 16) | (0x8c30 >> 2),
599         (0x0e00 << 16) | (0x8c34 >> 2),
600         (0x0e00 << 16) | (0x9600 >> 2),
601 };
602
603 static const u32 kalindi_rlc_save_restore_register_list[] =
604 {
605         (0x0e00 << 16) | (0xc12c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc140 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc150 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc15c >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc168 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc170 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc204 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xc2b4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xc2b8 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xc2bc >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0xc2c0 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x8228 >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x829c >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x869c >> 2),
632         0x00000000,
633         (0x0600 << 16) | (0x98f4 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x98f8 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x9900 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0xc260 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x90e8 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c000 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x3c00c >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x8c1c >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0x9700 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x4e00 << 16) | (0xcd20 >> 2),
654         0x00000000,
655         (0x5e00 << 16) | (0xcd20 >> 2),
656         0x00000000,
657         (0x6e00 << 16) | (0xcd20 >> 2),
658         0x00000000,
659         (0x7e00 << 16) | (0xcd20 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x89bc >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x8900 >> 2),
664         0x00000000,
665         0x3,
666         (0x0e00 << 16) | (0xc130 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc134 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc1fc >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc208 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc264 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc268 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc26c >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc270 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc274 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc28c >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc290 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc294 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc298 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc2a0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0xc2a4 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0xc2a8 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0xc2ac >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x301d0 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x30238 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x30250 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x30254 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x30258 >> 2),
709         0x00000000,
710         (0x0e00 << 16) | (0x3025c >> 2),
711         0x00000000,
712         (0x4e00 << 16) | (0xc900 >> 2),
713         0x00000000,
714         (0x5e00 << 16) | (0xc900 >> 2),
715         0x00000000,
716         (0x6e00 << 16) | (0xc900 >> 2),
717         0x00000000,
718         (0x7e00 << 16) | (0xc900 >> 2),
719         0x00000000,
720         (0x4e00 << 16) | (0xc904 >> 2),
721         0x00000000,
722         (0x5e00 << 16) | (0xc904 >> 2),
723         0x00000000,
724         (0x6e00 << 16) | (0xc904 >> 2),
725         0x00000000,
726         (0x7e00 << 16) | (0xc904 >> 2),
727         0x00000000,
728         (0x4e00 << 16) | (0xc908 >> 2),
729         0x00000000,
730         (0x5e00 << 16) | (0xc908 >> 2),
731         0x00000000,
732         (0x6e00 << 16) | (0xc908 >> 2),
733         0x00000000,
734         (0x7e00 << 16) | (0xc908 >> 2),
735         0x00000000,
736         (0x4e00 << 16) | (0xc90c >> 2),
737         0x00000000,
738         (0x5e00 << 16) | (0xc90c >> 2),
739         0x00000000,
740         (0x6e00 << 16) | (0xc90c >> 2),
741         0x00000000,
742         (0x7e00 << 16) | (0xc90c >> 2),
743         0x00000000,
744         (0x4e00 << 16) | (0xc910 >> 2),
745         0x00000000,
746         (0x5e00 << 16) | (0xc910 >> 2),
747         0x00000000,
748         (0x6e00 << 16) | (0xc910 >> 2),
749         0x00000000,
750         (0x7e00 << 16) | (0xc910 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0xc99c >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x9834 >> 2),
755         0x00000000,
756         (0x0000 << 16) | (0x30f00 >> 2),
757         0x00000000,
758         (0x0000 << 16) | (0x30f04 >> 2),
759         0x00000000,
760         (0x0000 << 16) | (0x30f08 >> 2),
761         0x00000000,
762         (0x0000 << 16) | (0x30f0c >> 2),
763         0x00000000,
764         (0x0600 << 16) | (0x9b7c >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8a14 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0x8a18 >> 2),
769         0x00000000,
770         (0x0600 << 16) | (0x30a00 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x8bf0 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8bcc >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x8b24 >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0x30a04 >> 2),
779         0x00000000,
780         (0x0600 << 16) | (0x30a10 >> 2),
781         0x00000000,
782         (0x0600 << 16) | (0x30a14 >> 2),
783         0x00000000,
784         (0x0600 << 16) | (0x30a18 >> 2),
785         0x00000000,
786         (0x0600 << 16) | (0x30a2c >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc700 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc704 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc708 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0xc768 >> 2),
795         0x00000000,
796         (0x0400 << 16) | (0xc770 >> 2),
797         0x00000000,
798         (0x0400 << 16) | (0xc774 >> 2),
799         0x00000000,
800         (0x0400 << 16) | (0xc798 >> 2),
801         0x00000000,
802         (0x0400 << 16) | (0xc79c >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x9100 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x3c010 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c00 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8c04 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x8c20 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x8c38 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0x8c3c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xae00 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x9604 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac08 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac0c >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac10 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac14 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac58 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac68 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac6c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac70 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac74 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac78 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac7c >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xac80 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xac84 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0xac88 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0xac8c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x970c >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x9714 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x9718 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x971c >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x4e00 << 16) | (0x31068 >> 2),
863         0x00000000,
864         (0x5e00 << 16) | (0x31068 >> 2),
865         0x00000000,
866         (0x6e00 << 16) | (0x31068 >> 2),
867         0x00000000,
868         (0x7e00 << 16) | (0x31068 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xcd10 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0xcd14 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x88b0 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x88b4 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88b8 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x88bc >> 2),
881         0x00000000,
882         (0x0400 << 16) | (0x89c0 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88c4 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x88c8 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x88d0 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x88d4 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x88d8 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x8980 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x30938 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x3093c >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x30940 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x89a0 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x30900 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x30904 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x89b4 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x3e1fc >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x3c210 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x3c214 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x3c218 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0x8904 >> 2),
919         0x00000000,
920         0x5,
921         (0x0e00 << 16) | (0x8c28 >> 2),
922         (0x0e00 << 16) | (0x8c2c >> 2),
923         (0x0e00 << 16) | (0x8c30 >> 2),
924         (0x0e00 << 16) | (0x8c34 >> 2),
925         (0x0e00 << 16) | (0x9600 >> 2),
926 };
927
928 static const u32 bonaire_golden_spm_registers[] =
929 {
930         0x30800, 0xe0ffffff, 0xe0000000
931 };
932
933 static const u32 bonaire_golden_common_registers[] =
934 {
935         0xc770, 0xffffffff, 0x00000800,
936         0xc774, 0xffffffff, 0x00000800,
937         0xc798, 0xffffffff, 0x00007fbf,
938         0xc79c, 0xffffffff, 0x00007faf
939 };
940
941 static const u32 bonaire_golden_registers[] =
942 {
943         0x3354, 0x00000333, 0x00000333,
944         0x3350, 0x000c0fc0, 0x00040200,
945         0x9a10, 0x00010000, 0x00058208,
946         0x3c000, 0xffff1fff, 0x00140000,
947         0x3c200, 0xfdfc0fff, 0x00000100,
948         0x3c234, 0x40000000, 0x40000200,
949         0x9830, 0xffffffff, 0x00000000,
950         0x9834, 0xf00fffff, 0x00000400,
951         0x9838, 0x0002021c, 0x00020200,
952         0xc78, 0x00000080, 0x00000000,
953         0x5bb0, 0x000000f0, 0x00000070,
954         0x5bc0, 0xf0311fff, 0x80300000,
955         0x98f8, 0x73773777, 0x12010001,
956         0x350c, 0x00810000, 0x408af000,
957         0x7030, 0x31000111, 0x00000011,
958         0x2f48, 0x73773777, 0x12010001,
959         0x220c, 0x00007fb6, 0x0021a1b1,
960         0x2210, 0x00007fb6, 0x002021b1,
961         0x2180, 0x00007fb6, 0x00002191,
962         0x2218, 0x00007fb6, 0x002121b1,
963         0x221c, 0x00007fb6, 0x002021b1,
964         0x21dc, 0x00007fb6, 0x00002191,
965         0x21e0, 0x00007fb6, 0x00002191,
966         0x3628, 0x0000003f, 0x0000000a,
967         0x362c, 0x0000003f, 0x0000000a,
968         0x2ae4, 0x00073ffe, 0x000022a2,
969         0x240c, 0x000007ff, 0x00000000,
970         0x8a14, 0xf000003f, 0x00000007,
971         0x8bf0, 0x00002001, 0x00000001,
972         0x8b24, 0xffffffff, 0x00ffffff,
973         0x30a04, 0x0000ff0f, 0x00000000,
974         0x28a4c, 0x07ffffff, 0x06000000,
975         0x4d8, 0x00000fff, 0x00000100,
976         0x3e78, 0x00000001, 0x00000002,
977         0x9100, 0x03000000, 0x0362c688,
978         0x8c00, 0x000000ff, 0x00000001,
979         0xe40, 0x00001fff, 0x00001fff,
980         0x9060, 0x0000007f, 0x00000020,
981         0x9508, 0x00010000, 0x00010000,
982         0xac14, 0x000003ff, 0x000000f3,
983         0xac0c, 0xffffffff, 0x00001032
984 };
985
986 static const u32 bonaire_mgcg_cgcg_init[] =
987 {
988         0xc420, 0xffffffff, 0xfffffffc,
989         0x30800, 0xffffffff, 0xe0000000,
990         0x3c2a0, 0xffffffff, 0x00000100,
991         0x3c208, 0xffffffff, 0x00000100,
992         0x3c2c0, 0xffffffff, 0xc0000100,
993         0x3c2c8, 0xffffffff, 0xc0000100,
994         0x3c2c4, 0xffffffff, 0xc0000100,
995         0x55e4, 0xffffffff, 0x00600100,
996         0x3c280, 0xffffffff, 0x00000100,
997         0x3c214, 0xffffffff, 0x06000100,
998         0x3c220, 0xffffffff, 0x00000100,
999         0x3c218, 0xffffffff, 0x06000100,
1000         0x3c204, 0xffffffff, 0x00000100,
1001         0x3c2e0, 0xffffffff, 0x00000100,
1002         0x3c224, 0xffffffff, 0x00000100,
1003         0x3c200, 0xffffffff, 0x00000100,
1004         0x3c230, 0xffffffff, 0x00000100,
1005         0x3c234, 0xffffffff, 0x00000100,
1006         0x3c250, 0xffffffff, 0x00000100,
1007         0x3c254, 0xffffffff, 0x00000100,
1008         0x3c258, 0xffffffff, 0x00000100,
1009         0x3c25c, 0xffffffff, 0x00000100,
1010         0x3c260, 0xffffffff, 0x00000100,
1011         0x3c27c, 0xffffffff, 0x00000100,
1012         0x3c278, 0xffffffff, 0x00000100,
1013         0x3c210, 0xffffffff, 0x06000100,
1014         0x3c290, 0xffffffff, 0x00000100,
1015         0x3c274, 0xffffffff, 0x00000100,
1016         0x3c2b4, 0xffffffff, 0x00000100,
1017         0x3c2b0, 0xffffffff, 0x00000100,
1018         0x3c270, 0xffffffff, 0x00000100,
1019         0x30800, 0xffffffff, 0xe0000000,
1020         0x3c020, 0xffffffff, 0x00010000,
1021         0x3c024, 0xffffffff, 0x00030002,
1022         0x3c028, 0xffffffff, 0x00040007,
1023         0x3c02c, 0xffffffff, 0x00060005,
1024         0x3c030, 0xffffffff, 0x00090008,
1025         0x3c034, 0xffffffff, 0x00010000,
1026         0x3c038, 0xffffffff, 0x00030002,
1027         0x3c03c, 0xffffffff, 0x00040007,
1028         0x3c040, 0xffffffff, 0x00060005,
1029         0x3c044, 0xffffffff, 0x00090008,
1030         0x3c048, 0xffffffff, 0x00010000,
1031         0x3c04c, 0xffffffff, 0x00030002,
1032         0x3c050, 0xffffffff, 0x00040007,
1033         0x3c054, 0xffffffff, 0x00060005,
1034         0x3c058, 0xffffffff, 0x00090008,
1035         0x3c05c, 0xffffffff, 0x00010000,
1036         0x3c060, 0xffffffff, 0x00030002,
1037         0x3c064, 0xffffffff, 0x00040007,
1038         0x3c068, 0xffffffff, 0x00060005,
1039         0x3c06c, 0xffffffff, 0x00090008,
1040         0x3c070, 0xffffffff, 0x00010000,
1041         0x3c074, 0xffffffff, 0x00030002,
1042         0x3c078, 0xffffffff, 0x00040007,
1043         0x3c07c, 0xffffffff, 0x00060005,
1044         0x3c080, 0xffffffff, 0x00090008,
1045         0x3c084, 0xffffffff, 0x00010000,
1046         0x3c088, 0xffffffff, 0x00030002,
1047         0x3c08c, 0xffffffff, 0x00040007,
1048         0x3c090, 0xffffffff, 0x00060005,
1049         0x3c094, 0xffffffff, 0x00090008,
1050         0x3c098, 0xffffffff, 0x00010000,
1051         0x3c09c, 0xffffffff, 0x00030002,
1052         0x3c0a0, 0xffffffff, 0x00040007,
1053         0x3c0a4, 0xffffffff, 0x00060005,
1054         0x3c0a8, 0xffffffff, 0x00090008,
1055         0x3c000, 0xffffffff, 0x96e00200,
1056         0x8708, 0xffffffff, 0x00900100,
1057         0xc424, 0xffffffff, 0x0020003f,
1058         0x38, 0xffffffff, 0x0140001c,
1059         0x3c, 0x000f0000, 0x000f0000,
1060         0x220, 0xffffffff, 0xC060000C,
1061         0x224, 0xc0000fff, 0x00000100,
1062         0xf90, 0xffffffff, 0x00000100,
1063         0xf98, 0x00000101, 0x00000000,
1064         0x20a8, 0xffffffff, 0x00000104,
1065         0x55e4, 0xff000fff, 0x00000100,
1066         0x30cc, 0xc0000fff, 0x00000104,
1067         0xc1e4, 0x00000001, 0x00000001,
1068         0xd00c, 0xff000ff0, 0x00000100,
1069         0xd80c, 0xff000ff0, 0x00000100
1070 };
1071
1072 static const u32 spectre_golden_spm_registers[] =
1073 {
1074         0x30800, 0xe0ffffff, 0xe0000000
1075 };
1076
1077 static const u32 spectre_golden_common_registers[] =
1078 {
1079         0xc770, 0xffffffff, 0x00000800,
1080         0xc774, 0xffffffff, 0x00000800,
1081         0xc798, 0xffffffff, 0x00007fbf,
1082         0xc79c, 0xffffffff, 0x00007faf
1083 };
1084
1085 static const u32 spectre_golden_registers[] =
1086 {
1087         0x3c000, 0xffff1fff, 0x96940200,
1088         0x3c00c, 0xffff0001, 0xff000000,
1089         0x3c200, 0xfffc0fff, 0x00000100,
1090         0x6ed8, 0x00010101, 0x00010000,
1091         0x9834, 0xf00fffff, 0x00000400,
1092         0x9838, 0xfffffffc, 0x00020200,
1093         0x5bb0, 0x000000f0, 0x00000070,
1094         0x5bc0, 0xf0311fff, 0x80300000,
1095         0x98f8, 0x73773777, 0x12010001,
1096         0x9b7c, 0x00ff0000, 0x00fc0000,
1097         0x2f48, 0x73773777, 0x12010001,
1098         0x8a14, 0xf000003f, 0x00000007,
1099         0x8b24, 0xffffffff, 0x00ffffff,
1100         0x28350, 0x3f3f3fff, 0x00000082,
1101         0x28354, 0x0000003f, 0x00000000,
1102         0x3e78, 0x00000001, 0x00000002,
1103         0x913c, 0xffff03df, 0x00000004,
1104         0xc768, 0x00000008, 0x00000008,
1105         0x8c00, 0x000008ff, 0x00000800,
1106         0x9508, 0x00010000, 0x00010000,
1107         0xac0c, 0xffffffff, 0x54763210,
1108         0x214f8, 0x01ff01ff, 0x00000002,
1109         0x21498, 0x007ff800, 0x00200000,
1110         0x2015c, 0xffffffff, 0x00000f40,
1111         0x30934, 0xffffffff, 0x00000001
1112 };
1113
1114 static const u32 spectre_mgcg_cgcg_init[] =
1115 {
1116         0xc420, 0xffffffff, 0xfffffffc,
1117         0x30800, 0xffffffff, 0xe0000000,
1118         0x3c2a0, 0xffffffff, 0x00000100,
1119         0x3c208, 0xffffffff, 0x00000100,
1120         0x3c2c0, 0xffffffff, 0x00000100,
1121         0x3c2c8, 0xffffffff, 0x00000100,
1122         0x3c2c4, 0xffffffff, 0x00000100,
1123         0x55e4, 0xffffffff, 0x00600100,
1124         0x3c280, 0xffffffff, 0x00000100,
1125         0x3c214, 0xffffffff, 0x06000100,
1126         0x3c220, 0xffffffff, 0x00000100,
1127         0x3c218, 0xffffffff, 0x06000100,
1128         0x3c204, 0xffffffff, 0x00000100,
1129         0x3c2e0, 0xffffffff, 0x00000100,
1130         0x3c224, 0xffffffff, 0x00000100,
1131         0x3c200, 0xffffffff, 0x00000100,
1132         0x3c230, 0xffffffff, 0x00000100,
1133         0x3c234, 0xffffffff, 0x00000100,
1134         0x3c250, 0xffffffff, 0x00000100,
1135         0x3c254, 0xffffffff, 0x00000100,
1136         0x3c258, 0xffffffff, 0x00000100,
1137         0x3c25c, 0xffffffff, 0x00000100,
1138         0x3c260, 0xffffffff, 0x00000100,
1139         0x3c27c, 0xffffffff, 0x00000100,
1140         0x3c278, 0xffffffff, 0x00000100,
1141         0x3c210, 0xffffffff, 0x06000100,
1142         0x3c290, 0xffffffff, 0x00000100,
1143         0x3c274, 0xffffffff, 0x00000100,
1144         0x3c2b4, 0xffffffff, 0x00000100,
1145         0x3c2b0, 0xffffffff, 0x00000100,
1146         0x3c270, 0xffffffff, 0x00000100,
1147         0x30800, 0xffffffff, 0xe0000000,
1148         0x3c020, 0xffffffff, 0x00010000,
1149         0x3c024, 0xffffffff, 0x00030002,
1150         0x3c028, 0xffffffff, 0x00040007,
1151         0x3c02c, 0xffffffff, 0x00060005,
1152         0x3c030, 0xffffffff, 0x00090008,
1153         0x3c034, 0xffffffff, 0x00010000,
1154         0x3c038, 0xffffffff, 0x00030002,
1155         0x3c03c, 0xffffffff, 0x00040007,
1156         0x3c040, 0xffffffff, 0x00060005,
1157         0x3c044, 0xffffffff, 0x00090008,
1158         0x3c048, 0xffffffff, 0x00010000,
1159         0x3c04c, 0xffffffff, 0x00030002,
1160         0x3c050, 0xffffffff, 0x00040007,
1161         0x3c054, 0xffffffff, 0x00060005,
1162         0x3c058, 0xffffffff, 0x00090008,
1163         0x3c05c, 0xffffffff, 0x00010000,
1164         0x3c060, 0xffffffff, 0x00030002,
1165         0x3c064, 0xffffffff, 0x00040007,
1166         0x3c068, 0xffffffff, 0x00060005,
1167         0x3c06c, 0xffffffff, 0x00090008,
1168         0x3c070, 0xffffffff, 0x00010000,
1169         0x3c074, 0xffffffff, 0x00030002,
1170         0x3c078, 0xffffffff, 0x00040007,
1171         0x3c07c, 0xffffffff, 0x00060005,
1172         0x3c080, 0xffffffff, 0x00090008,
1173         0x3c084, 0xffffffff, 0x00010000,
1174         0x3c088, 0xffffffff, 0x00030002,
1175         0x3c08c, 0xffffffff, 0x00040007,
1176         0x3c090, 0xffffffff, 0x00060005,
1177         0x3c094, 0xffffffff, 0x00090008,
1178         0x3c098, 0xffffffff, 0x00010000,
1179         0x3c09c, 0xffffffff, 0x00030002,
1180         0x3c0a0, 0xffffffff, 0x00040007,
1181         0x3c0a4, 0xffffffff, 0x00060005,
1182         0x3c0a8, 0xffffffff, 0x00090008,
1183         0x3c0ac, 0xffffffff, 0x00010000,
1184         0x3c0b0, 0xffffffff, 0x00030002,
1185         0x3c0b4, 0xffffffff, 0x00040007,
1186         0x3c0b8, 0xffffffff, 0x00060005,
1187         0x3c0bc, 0xffffffff, 0x00090008,
1188         0x3c000, 0xffffffff, 0x96e00200,
1189         0x8708, 0xffffffff, 0x00900100,
1190         0xc424, 0xffffffff, 0x0020003f,
1191         0x38, 0xffffffff, 0x0140001c,
1192         0x3c, 0x000f0000, 0x000f0000,
1193         0x220, 0xffffffff, 0xC060000C,
1194         0x224, 0xc0000fff, 0x00000100,
1195         0xf90, 0xffffffff, 0x00000100,
1196         0xf98, 0x00000101, 0x00000000,
1197         0x20a8, 0xffffffff, 0x00000104,
1198         0x55e4, 0xff000fff, 0x00000100,
1199         0x30cc, 0xc0000fff, 0x00000104,
1200         0xc1e4, 0x00000001, 0x00000001,
1201         0xd00c, 0xff000ff0, 0x00000100,
1202         0xd80c, 0xff000ff0, 0x00000100
1203 };
1204
1205 static const u32 kalindi_golden_spm_registers[] =
1206 {
1207         0x30800, 0xe0ffffff, 0xe0000000
1208 };
1209
1210 static const u32 kalindi_golden_common_registers[] =
1211 {
1212         0xc770, 0xffffffff, 0x00000800,
1213         0xc774, 0xffffffff, 0x00000800,
1214         0xc798, 0xffffffff, 0x00007fbf,
1215         0xc79c, 0xffffffff, 0x00007faf
1216 };
1217
1218 static const u32 kalindi_golden_registers[] =
1219 {
1220         0x3c000, 0xffffdfff, 0x6e944040,
1221         0x55e4, 0xff607fff, 0xfc000100,
1222         0x3c220, 0xff000fff, 0x00000100,
1223         0x3c224, 0xff000fff, 0x00000100,
1224         0x3c200, 0xfffc0fff, 0x00000100,
1225         0x6ed8, 0x00010101, 0x00010000,
1226         0x9830, 0xffffffff, 0x00000000,
1227         0x9834, 0xf00fffff, 0x00000400,
1228         0x5bb0, 0x000000f0, 0x00000070,
1229         0x5bc0, 0xf0311fff, 0x80300000,
1230         0x98f8, 0x73773777, 0x12010001,
1231         0x98fc, 0xffffffff, 0x00000010,
1232         0x9b7c, 0x00ff0000, 0x00fc0000,
1233         0x8030, 0x00001f0f, 0x0000100a,
1234         0x2f48, 0x73773777, 0x12010001,
1235         0x2408, 0x000fffff, 0x000c007f,
1236         0x8a14, 0xf000003f, 0x00000007,
1237         0x8b24, 0x3fff3fff, 0x00ffcfff,
1238         0x30a04, 0x0000ff0f, 0x00000000,
1239         0x28a4c, 0x07ffffff, 0x06000000,
1240         0x4d8, 0x00000fff, 0x00000100,
1241         0x3e78, 0x00000001, 0x00000002,
1242         0xc768, 0x00000008, 0x00000008,
1243         0x8c00, 0x000000ff, 0x00000003,
1244         0x214f8, 0x01ff01ff, 0x00000002,
1245         0x21498, 0x007ff800, 0x00200000,
1246         0x2015c, 0xffffffff, 0x00000f40,
1247         0x88c4, 0x001f3ae3, 0x00000082,
1248         0x88d4, 0x0000001f, 0x00000010,
1249         0x30934, 0xffffffff, 0x00000000
1250 };
1251
1252 static const u32 kalindi_mgcg_cgcg_init[] =
1253 {
1254         0xc420, 0xffffffff, 0xfffffffc,
1255         0x30800, 0xffffffff, 0xe0000000,
1256         0x3c2a0, 0xffffffff, 0x00000100,
1257         0x3c208, 0xffffffff, 0x00000100,
1258         0x3c2c0, 0xffffffff, 0x00000100,
1259         0x3c2c8, 0xffffffff, 0x00000100,
1260         0x3c2c4, 0xffffffff, 0x00000100,
1261         0x55e4, 0xffffffff, 0x00600100,
1262         0x3c280, 0xffffffff, 0x00000100,
1263         0x3c214, 0xffffffff, 0x06000100,
1264         0x3c220, 0xffffffff, 0x00000100,
1265         0x3c218, 0xffffffff, 0x06000100,
1266         0x3c204, 0xffffffff, 0x00000100,
1267         0x3c2e0, 0xffffffff, 0x00000100,
1268         0x3c224, 0xffffffff, 0x00000100,
1269         0x3c200, 0xffffffff, 0x00000100,
1270         0x3c230, 0xffffffff, 0x00000100,
1271         0x3c234, 0xffffffff, 0x00000100,
1272         0x3c250, 0xffffffff, 0x00000100,
1273         0x3c254, 0xffffffff, 0x00000100,
1274         0x3c258, 0xffffffff, 0x00000100,
1275         0x3c25c, 0xffffffff, 0x00000100,
1276         0x3c260, 0xffffffff, 0x00000100,
1277         0x3c27c, 0xffffffff, 0x00000100,
1278         0x3c278, 0xffffffff, 0x00000100,
1279         0x3c210, 0xffffffff, 0x06000100,
1280         0x3c290, 0xffffffff, 0x00000100,
1281         0x3c274, 0xffffffff, 0x00000100,
1282         0x3c2b4, 0xffffffff, 0x00000100,
1283         0x3c2b0, 0xffffffff, 0x00000100,
1284         0x3c270, 0xffffffff, 0x00000100,
1285         0x30800, 0xffffffff, 0xe0000000,
1286         0x3c020, 0xffffffff, 0x00010000,
1287         0x3c024, 0xffffffff, 0x00030002,
1288         0x3c028, 0xffffffff, 0x00040007,
1289         0x3c02c, 0xffffffff, 0x00060005,
1290         0x3c030, 0xffffffff, 0x00090008,
1291         0x3c034, 0xffffffff, 0x00010000,
1292         0x3c038, 0xffffffff, 0x00030002,
1293         0x3c03c, 0xffffffff, 0x00040007,
1294         0x3c040, 0xffffffff, 0x00060005,
1295         0x3c044, 0xffffffff, 0x00090008,
1296         0x3c000, 0xffffffff, 0x96e00200,
1297         0x8708, 0xffffffff, 0x00900100,
1298         0xc424, 0xffffffff, 0x0020003f,
1299         0x38, 0xffffffff, 0x0140001c,
1300         0x3c, 0x000f0000, 0x000f0000,
1301         0x220, 0xffffffff, 0xC060000C,
1302         0x224, 0xc0000fff, 0x00000100,
1303         0x20a8, 0xffffffff, 0x00000104,
1304         0x55e4, 0xff000fff, 0x00000100,
1305         0x30cc, 0xc0000fff, 0x00000104,
1306         0xc1e4, 0x00000001, 0x00000001,
1307         0xd00c, 0xff000ff0, 0x00000100,
1308         0xd80c, 0xff000ff0, 0x00000100
1309 };
1310
1311 static const u32 hawaii_golden_spm_registers[] =
1312 {
1313         0x30800, 0xe0ffffff, 0xe0000000
1314 };
1315
1316 static const u32 hawaii_golden_common_registers[] =
1317 {
1318         0x30800, 0xffffffff, 0xe0000000,
1319         0x28350, 0xffffffff, 0x3a00161a,
1320         0x28354, 0xffffffff, 0x0000002e,
1321         0x9a10, 0xffffffff, 0x00018208,
1322         0x98f8, 0xffffffff, 0x12011003
1323 };
1324
1325 static const u32 hawaii_golden_registers[] =
1326 {
1327         0x3354, 0x00000333, 0x00000333,
1328         0x9a10, 0x00010000, 0x00058208,
1329         0x9830, 0xffffffff, 0x00000000,
1330         0x9834, 0xf00fffff, 0x00000400,
1331         0x9838, 0x0002021c, 0x00020200,
1332         0xc78, 0x00000080, 0x00000000,
1333         0x5bb0, 0x000000f0, 0x00000070,
1334         0x5bc0, 0xf0311fff, 0x80300000,
1335         0x350c, 0x00810000, 0x408af000,
1336         0x7030, 0x31000111, 0x00000011,
1337         0x2f48, 0x73773777, 0x12010001,
1338         0x2120, 0x0000007f, 0x0000001b,
1339         0x21dc, 0x00007fb6, 0x00002191,
1340         0x3628, 0x0000003f, 0x0000000a,
1341         0x362c, 0x0000003f, 0x0000000a,
1342         0x2ae4, 0x00073ffe, 0x000022a2,
1343         0x240c, 0x000007ff, 0x00000000,
1344         0x8bf0, 0x00002001, 0x00000001,
1345         0x8b24, 0xffffffff, 0x00ffffff,
1346         0x30a04, 0x0000ff0f, 0x00000000,
1347         0x28a4c, 0x07ffffff, 0x06000000,
1348         0x3e78, 0x00000001, 0x00000002,
1349         0xc768, 0x00000008, 0x00000008,
1350         0xc770, 0x00000f00, 0x00000800,
1351         0xc774, 0x00000f00, 0x00000800,
1352         0xc798, 0x00ffffff, 0x00ff7fbf,
1353         0xc79c, 0x00ffffff, 0x00ff7faf,
1354         0x8c00, 0x000000ff, 0x00000800,
1355         0xe40, 0x00001fff, 0x00001fff,
1356         0x9060, 0x0000007f, 0x00000020,
1357         0x9508, 0x00010000, 0x00010000,
1358         0xae00, 0x00100000, 0x000ff07c,
1359         0xac14, 0x000003ff, 0x0000000f,
1360         0xac10, 0xffffffff, 0x7564fdec,
1361         0xac0c, 0xffffffff, 0x3120b9a8,
1362         0xac08, 0x20000000, 0x0f9c0000
1363 };
1364
1365 static const u32 hawaii_mgcg_cgcg_init[] =
1366 {
1367         0xc420, 0xffffffff, 0xfffffffd,
1368         0x30800, 0xffffffff, 0xe0000000,
1369         0x3c2a0, 0xffffffff, 0x00000100,
1370         0x3c208, 0xffffffff, 0x00000100,
1371         0x3c2c0, 0xffffffff, 0x00000100,
1372         0x3c2c8, 0xffffffff, 0x00000100,
1373         0x3c2c4, 0xffffffff, 0x00000100,
1374         0x55e4, 0xffffffff, 0x00200100,
1375         0x3c280, 0xffffffff, 0x00000100,
1376         0x3c214, 0xffffffff, 0x06000100,
1377         0x3c220, 0xffffffff, 0x00000100,
1378         0x3c218, 0xffffffff, 0x06000100,
1379         0x3c204, 0xffffffff, 0x00000100,
1380         0x3c2e0, 0xffffffff, 0x00000100,
1381         0x3c224, 0xffffffff, 0x00000100,
1382         0x3c200, 0xffffffff, 0x00000100,
1383         0x3c230, 0xffffffff, 0x00000100,
1384         0x3c234, 0xffffffff, 0x00000100,
1385         0x3c250, 0xffffffff, 0x00000100,
1386         0x3c254, 0xffffffff, 0x00000100,
1387         0x3c258, 0xffffffff, 0x00000100,
1388         0x3c25c, 0xffffffff, 0x00000100,
1389         0x3c260, 0xffffffff, 0x00000100,
1390         0x3c27c, 0xffffffff, 0x00000100,
1391         0x3c278, 0xffffffff, 0x00000100,
1392         0x3c210, 0xffffffff, 0x06000100,
1393         0x3c290, 0xffffffff, 0x00000100,
1394         0x3c274, 0xffffffff, 0x00000100,
1395         0x3c2b4, 0xffffffff, 0x00000100,
1396         0x3c2b0, 0xffffffff, 0x00000100,
1397         0x3c270, 0xffffffff, 0x00000100,
1398         0x30800, 0xffffffff, 0xe0000000,
1399         0x3c020, 0xffffffff, 0x00010000,
1400         0x3c024, 0xffffffff, 0x00030002,
1401         0x3c028, 0xffffffff, 0x00040007,
1402         0x3c02c, 0xffffffff, 0x00060005,
1403         0x3c030, 0xffffffff, 0x00090008,
1404         0x3c034, 0xffffffff, 0x00010000,
1405         0x3c038, 0xffffffff, 0x00030002,
1406         0x3c03c, 0xffffffff, 0x00040007,
1407         0x3c040, 0xffffffff, 0x00060005,
1408         0x3c044, 0xffffffff, 0x00090008,
1409         0x3c048, 0xffffffff, 0x00010000,
1410         0x3c04c, 0xffffffff, 0x00030002,
1411         0x3c050, 0xffffffff, 0x00040007,
1412         0x3c054, 0xffffffff, 0x00060005,
1413         0x3c058, 0xffffffff, 0x00090008,
1414         0x3c05c, 0xffffffff, 0x00010000,
1415         0x3c060, 0xffffffff, 0x00030002,
1416         0x3c064, 0xffffffff, 0x00040007,
1417         0x3c068, 0xffffffff, 0x00060005,
1418         0x3c06c, 0xffffffff, 0x00090008,
1419         0x3c070, 0xffffffff, 0x00010000,
1420         0x3c074, 0xffffffff, 0x00030002,
1421         0x3c078, 0xffffffff, 0x00040007,
1422         0x3c07c, 0xffffffff, 0x00060005,
1423         0x3c080, 0xffffffff, 0x00090008,
1424         0x3c084, 0xffffffff, 0x00010000,
1425         0x3c088, 0xffffffff, 0x00030002,
1426         0x3c08c, 0xffffffff, 0x00040007,
1427         0x3c090, 0xffffffff, 0x00060005,
1428         0x3c094, 0xffffffff, 0x00090008,
1429         0x3c098, 0xffffffff, 0x00010000,
1430         0x3c09c, 0xffffffff, 0x00030002,
1431         0x3c0a0, 0xffffffff, 0x00040007,
1432         0x3c0a4, 0xffffffff, 0x00060005,
1433         0x3c0a8, 0xffffffff, 0x00090008,
1434         0x3c0ac, 0xffffffff, 0x00010000,
1435         0x3c0b0, 0xffffffff, 0x00030002,
1436         0x3c0b4, 0xffffffff, 0x00040007,
1437         0x3c0b8, 0xffffffff, 0x00060005,
1438         0x3c0bc, 0xffffffff, 0x00090008,
1439         0x3c0c0, 0xffffffff, 0x00010000,
1440         0x3c0c4, 0xffffffff, 0x00030002,
1441         0x3c0c8, 0xffffffff, 0x00040007,
1442         0x3c0cc, 0xffffffff, 0x00060005,
1443         0x3c0d0, 0xffffffff, 0x00090008,
1444         0x3c0d4, 0xffffffff, 0x00010000,
1445         0x3c0d8, 0xffffffff, 0x00030002,
1446         0x3c0dc, 0xffffffff, 0x00040007,
1447         0x3c0e0, 0xffffffff, 0x00060005,
1448         0x3c0e4, 0xffffffff, 0x00090008,
1449         0x3c0e8, 0xffffffff, 0x00010000,
1450         0x3c0ec, 0xffffffff, 0x00030002,
1451         0x3c0f0, 0xffffffff, 0x00040007,
1452         0x3c0f4, 0xffffffff, 0x00060005,
1453         0x3c0f8, 0xffffffff, 0x00090008,
1454         0xc318, 0xffffffff, 0x00020200,
1455         0x3350, 0xffffffff, 0x00000200,
1456         0x15c0, 0xffffffff, 0x00000400,
1457         0x55e8, 0xffffffff, 0x00000000,
1458         0x2f50, 0xffffffff, 0x00000902,
1459         0x3c000, 0xffffffff, 0x96940200,
1460         0x8708, 0xffffffff, 0x00900100,
1461         0xc424, 0xffffffff, 0x0020003f,
1462         0x38, 0xffffffff, 0x0140001c,
1463         0x3c, 0x000f0000, 0x000f0000,
1464         0x220, 0xffffffff, 0xc060000c,
1465         0x224, 0xc0000fff, 0x00000100,
1466         0xf90, 0xffffffff, 0x00000100,
1467         0xf98, 0x00000101, 0x00000000,
1468         0x20a8, 0xffffffff, 0x00000104,
1469         0x55e4, 0xff000fff, 0x00000100,
1470         0x30cc, 0xc0000fff, 0x00000104,
1471         0xc1e4, 0x00000001, 0x00000001,
1472         0xd00c, 0xff000ff0, 0x00000100,
1473         0xd80c, 0xff000ff0, 0x00000100
1474 };
1475
1476 static void cik_init_golden_registers(struct radeon_device *rdev)
1477 {
1478         switch (rdev->family) {
1479         case CHIP_BONAIRE:
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_mgcg_cgcg_init,
1482                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_common_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1489                 radeon_program_register_sequence(rdev,
1490                                                  bonaire_golden_spm_registers,
1491                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1492                 break;
1493         case CHIP_KABINI:
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_mgcg_cgcg_init,
1496                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_common_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1503                 radeon_program_register_sequence(rdev,
1504                                                  kalindi_golden_spm_registers,
1505                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1506                 break;
1507         case CHIP_KAVERI:
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_mgcg_cgcg_init,
1510                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_common_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1517                 radeon_program_register_sequence(rdev,
1518                                                  spectre_golden_spm_registers,
1519                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1520                 break;
1521         case CHIP_HAWAII:
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_mgcg_cgcg_init,
1524                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_common_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1531                 radeon_program_register_sequence(rdev,
1532                                                  hawaii_golden_spm_registers,
1533                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1534                 break;
1535         default:
1536                 break;
1537         }
1538 }
1539
1540 /**
1541  * cik_get_xclk - get the xclk
1542  *
1543  * @rdev: radeon_device pointer
1544  *
1545  * Returns the reference clock used by the gfx engine
1546  * (CIK).
1547  */
1548 u32 cik_get_xclk(struct radeon_device *rdev)
1549 {
1550         u32 reference_clock = rdev->clock.spll.reference_freq;
1551
1552         if (rdev->flags & RADEON_IS_IGP) {
1553                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1554                         return reference_clock / 2;
1555         } else {
1556                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1557                         return reference_clock / 4;
1558         }
1559         return reference_clock;
1560 }
1561
1562 /**
1563  * cik_mm_rdoorbell - read a doorbell dword
1564  *
1565  * @rdev: radeon_device pointer
1566  * @index: doorbell index
1567  *
1568  * Returns the value in the doorbell aperture at the
1569  * requested doorbell index (CIK).
1570  */
1571 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1572 {
1573         if (index < rdev->doorbell.num_doorbells) {
1574                 return readl(rdev->doorbell.ptr + index);
1575         } else {
1576                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1577                 return 0;
1578         }
1579 }
1580
1581 /**
1582  * cik_mm_wdoorbell - write a doorbell dword
1583  *
1584  * @rdev: radeon_device pointer
1585  * @index: doorbell index
1586  * @v: value to write
1587  *
1588  * Writes @v to the doorbell aperture at the
1589  * requested doorbell index (CIK).
1590  */
1591 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1592 {
1593         if (index < rdev->doorbell.num_doorbells) {
1594                 writel(v, rdev->doorbell.ptr + index);
1595         } else {
1596                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1597         }
1598 }
1599
1600 #define BONAIRE_IO_MC_REGS_SIZE 36
1601
1602 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1603 {
1604         {0x00000070, 0x04400000},
1605         {0x00000071, 0x80c01803},
1606         {0x00000072, 0x00004004},
1607         {0x00000073, 0x00000100},
1608         {0x00000074, 0x00ff0000},
1609         {0x00000075, 0x34000000},
1610         {0x00000076, 0x08000014},
1611         {0x00000077, 0x00cc08ec},
1612         {0x00000078, 0x00000400},
1613         {0x00000079, 0x00000000},
1614         {0x0000007a, 0x04090000},
1615         {0x0000007c, 0x00000000},
1616         {0x0000007e, 0x4408a8e8},
1617         {0x0000007f, 0x00000304},
1618         {0x00000080, 0x00000000},
1619         {0x00000082, 0x00000001},
1620         {0x00000083, 0x00000002},
1621         {0x00000084, 0xf3e4f400},
1622         {0x00000085, 0x052024e3},
1623         {0x00000087, 0x00000000},
1624         {0x00000088, 0x01000000},
1625         {0x0000008a, 0x1c0a0000},
1626         {0x0000008b, 0xff010000},
1627         {0x0000008d, 0xffffefff},
1628         {0x0000008e, 0xfff3efff},
1629         {0x0000008f, 0xfff3efbf},
1630         {0x00000092, 0xf7ffffff},
1631         {0x00000093, 0xffffff7f},
1632         {0x00000095, 0x00101101},
1633         {0x00000096, 0x00000fff},
1634         {0x00000097, 0x00116fff},
1635         {0x00000098, 0x60010000},
1636         {0x00000099, 0x10010000},
1637         {0x0000009a, 0x00006000},
1638         {0x0000009b, 0x00001000},
1639         {0x0000009f, 0x00b48000}
1640 };
1641
1642 #define HAWAII_IO_MC_REGS_SIZE 22
1643
1644 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1645 {
1646         {0x0000007d, 0x40000000},
1647         {0x0000007e, 0x40180304},
1648         {0x0000007f, 0x0000ff00},
1649         {0x00000081, 0x00000000},
1650         {0x00000083, 0x00000800},
1651         {0x00000086, 0x00000000},
1652         {0x00000087, 0x00000100},
1653         {0x00000088, 0x00020100},
1654         {0x00000089, 0x00000000},
1655         {0x0000008b, 0x00040000},
1656         {0x0000008c, 0x00000100},
1657         {0x0000008e, 0xff010000},
1658         {0x00000090, 0xffffefff},
1659         {0x00000091, 0xfff3efff},
1660         {0x00000092, 0xfff3efbf},
1661         {0x00000093, 0xf7ffffff},
1662         {0x00000094, 0xffffff7f},
1663         {0x00000095, 0x00000fff},
1664         {0x00000096, 0x00116fff},
1665         {0x00000097, 0x60010000},
1666         {0x00000098, 0x10010000},
1667         {0x0000009f, 0x00c79000}
1668 };
1669
1670
1671 /**
1672  * cik_srbm_select - select specific register instances
1673  *
1674  * @rdev: radeon_device pointer
1675  * @me: selected ME (micro engine)
1676  * @pipe: pipe
1677  * @queue: queue
1678  * @vmid: VMID
1679  *
1680  * Switches the currently active registers instances.  Some
1681  * registers are instanced per VMID, others are instanced per
1682  * me/pipe/queue combination.
1683  */
1684 static void cik_srbm_select(struct radeon_device *rdev,
1685                             u32 me, u32 pipe, u32 queue, u32 vmid)
1686 {
1687         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1688                              MEID(me & 0x3) |
1689                              VMID(vmid & 0xf) |
1690                              QUEUEID(queue & 0x7));
1691         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1692 }
1693
1694 /* ucode loading */
1695 /**
1696  * ci_mc_load_microcode - load MC ucode into the hw
1697  *
1698  * @rdev: radeon_device pointer
1699  *
1700  * Load the GDDR MC ucode into the hw (CIK).
1701  * Returns 0 on success, error on failure.
1702  */
1703 int ci_mc_load_microcode(struct radeon_device *rdev)
1704 {
1705         const __be32 *fw_data;
1706         u32 running, blackout = 0;
1707         u32 *io_mc_regs;
1708         int i, regs_size, ucode_size;
1709
1710         if (!rdev->mc_fw)
1711                 return -EINVAL;
1712
1713         ucode_size = rdev->mc_fw->size / 4;
1714
1715         switch (rdev->family) {
1716         case CHIP_BONAIRE:
1717                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1718                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1719                 break;
1720         case CHIP_HAWAII:
1721                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1722                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1723                 break;
1724         default:
1725                 return -EINVAL;
1726         }
1727
1728         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1729
1730         if (running == 0) {
1731                 if (running) {
1732                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1733                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1734                 }
1735
1736                 /* reset the engine and set to writable */
1737                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1738                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1739
1740                 /* load mc io regs */
1741                 for (i = 0; i < regs_size; i++) {
1742                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1743                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1744                 }
1745                 /* load the MC ucode */
1746                 fw_data = (const __be32 *)rdev->mc_fw->data;
1747                 for (i = 0; i < ucode_size; i++)
1748                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1749
1750                 /* put the engine back into the active state */
1751                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1752                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1753                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1754
1755                 /* wait for training to complete */
1756                 for (i = 0; i < rdev->usec_timeout; i++) {
1757                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1758                                 break;
1759                         udelay(1);
1760                 }
1761                 for (i = 0; i < rdev->usec_timeout; i++) {
1762                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1763                                 break;
1764                         udelay(1);
1765                 }
1766
1767                 if (running)
1768                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1769         }
1770
1771         return 0;
1772 }
1773
1774 /**
1775  * cik_init_microcode - load ucode images from disk
1776  *
1777  * @rdev: radeon_device pointer
1778  *
1779  * Use the firmware interface to load the ucode images into
1780  * the driver (not loaded into hw).
1781  * Returns 0 on success, error on failure.
1782  */
1783 static int cik_init_microcode(struct radeon_device *rdev)
1784 {
1785         const char *chip_name;
1786         size_t pfp_req_size, me_req_size, ce_req_size,
1787                 mec_req_size, rlc_req_size, mc_req_size = 0,
1788                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1789         char fw_name[30];
1790         int err;
1791
1792         DRM_DEBUG("\n");
1793
1794         switch (rdev->family) {
1795         case CHIP_BONAIRE:
1796                 chip_name = "BONAIRE";
1797                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1798                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1799                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1800                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1801                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1802                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1803                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1804                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1805                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1806                 break;
1807         case CHIP_HAWAII:
1808                 chip_name = "HAWAII";
1809                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1810                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1811                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1812                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1813                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1814                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1815                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1816                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1817                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1818                 break;
1819         case CHIP_KAVERI:
1820                 chip_name = "KAVERI";
1821                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1822                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1823                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1824                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1825                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1826                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1827                 break;
1828         case CHIP_KABINI:
1829                 chip_name = "KABINI";
1830                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1831                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1832                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1833                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1834                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1835                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1836                 break;
1837         default: BUG();
1838         }
1839
1840         DRM_INFO("Loading %s Microcode\n", chip_name);
1841
1842         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1843         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1844         if (err)
1845                 goto out;
1846         if (rdev->pfp_fw->size != pfp_req_size) {
1847                 printk(KERN_ERR
1848                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1849                        rdev->pfp_fw->size, fw_name);
1850                 err = -EINVAL;
1851                 goto out;
1852         }
1853
1854         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1855         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1856         if (err)
1857                 goto out;
1858         if (rdev->me_fw->size != me_req_size) {
1859                 printk(KERN_ERR
1860                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1861                        rdev->me_fw->size, fw_name);
1862                 err = -EINVAL;
1863         }
1864
1865         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1866         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1867         if (err)
1868                 goto out;
1869         if (rdev->ce_fw->size != ce_req_size) {
1870                 printk(KERN_ERR
1871                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1872                        rdev->ce_fw->size, fw_name);
1873                 err = -EINVAL;
1874         }
1875
1876         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1877         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1878         if (err)
1879                 goto out;
1880         if (rdev->mec_fw->size != mec_req_size) {
1881                 printk(KERN_ERR
1882                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1883                        rdev->mec_fw->size, fw_name);
1884                 err = -EINVAL;
1885         }
1886
1887         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1888         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1889         if (err)
1890                 goto out;
1891         if (rdev->rlc_fw->size != rlc_req_size) {
1892                 printk(KERN_ERR
1893                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1894                        rdev->rlc_fw->size, fw_name);
1895                 err = -EINVAL;
1896         }
1897
1898         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1899         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1900         if (err)
1901                 goto out;
1902         if (rdev->sdma_fw->size != sdma_req_size) {
1903                 printk(KERN_ERR
1904                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1905                        rdev->sdma_fw->size, fw_name);
1906                 err = -EINVAL;
1907         }
1908
1909         /* No SMC, MC ucode on APUs */
1910         if (!(rdev->flags & RADEON_IS_IGP)) {
1911                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1912                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1913                 if (err) {
1914                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1915                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1916                         if (err)
1917                                 goto out;
1918                 }
1919                 if ((rdev->mc_fw->size != mc_req_size) &&
1920                     (rdev->mc_fw->size != mc2_req_size)){
1921                         printk(KERN_ERR
1922                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1923                                rdev->mc_fw->size, fw_name);
1924                         err = -EINVAL;
1925                 }
1926                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1927
1928                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1929                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1930                 if (err) {
1931                         printk(KERN_ERR
1932                                "smc: error loading firmware \"%s\"\n",
1933                                fw_name);
1934                         release_firmware(rdev->smc_fw);
1935                         rdev->smc_fw = NULL;
1936                         err = 0;
1937                 } else if (rdev->smc_fw->size != smc_req_size) {
1938                         printk(KERN_ERR
1939                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1940                                rdev->smc_fw->size, fw_name);
1941                         err = -EINVAL;
1942                 }
1943         }
1944
1945 out:
1946         if (err) {
1947                 if (err != -EINVAL)
1948                         printk(KERN_ERR
1949                                "cik_cp: Failed to load firmware \"%s\"\n",
1950                                fw_name);
1951                 release_firmware(rdev->pfp_fw);
1952                 rdev->pfp_fw = NULL;
1953                 release_firmware(rdev->me_fw);
1954                 rdev->me_fw = NULL;
1955                 release_firmware(rdev->ce_fw);
1956                 rdev->ce_fw = NULL;
1957                 release_firmware(rdev->rlc_fw);
1958                 rdev->rlc_fw = NULL;
1959                 release_firmware(rdev->mc_fw);
1960                 rdev->mc_fw = NULL;
1961                 release_firmware(rdev->smc_fw);
1962                 rdev->smc_fw = NULL;
1963         }
1964         return err;
1965 }
1966
1967 /*
1968  * Core functions
1969  */
1970 /**
1971  * cik_tiling_mode_table_init - init the hw tiling table
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Starting with SI, the tiling setup is done globally in a
1976  * set of 32 tiling modes.  Rather than selecting each set of
1977  * parameters per surface as on older asics, we just select
1978  * which index in the tiling table we want to use, and the
1979  * surface uses those parameters (CIK).
1980  */
1981 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1982 {
1983         const u32 num_tile_mode_states = 32;
1984         const u32 num_secondary_tile_mode_states = 16;
1985         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1986         u32 num_pipe_configs;
1987         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1988                 rdev->config.cik.max_shader_engines;
1989
1990         switch (rdev->config.cik.mem_row_size_in_kb) {
1991         case 1:
1992                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1993                 break;
1994         case 2:
1995         default:
1996                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1997                 break;
1998         case 4:
1999                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2000                 break;
2001         }
2002
2003         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2004         if (num_pipe_configs > 8)
2005                 num_pipe_configs = 16;
2006
2007         if (num_pipe_configs == 16) {
2008                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2009                         switch (reg_offset) {
2010                         case 0:
2011                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2012                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2013                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2014                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2015                                 break;
2016                         case 1:
2017                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2019                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2020                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2021                                 break;
2022                         case 2:
2023                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2024                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2025                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2026                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2027                                 break;
2028                         case 3:
2029                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2030                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2031                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2032                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2033                                 break;
2034                         case 4:
2035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2037                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2038                                                  TILE_SPLIT(split_equal_to_row_size));
2039                                 break;
2040                         case 5:
2041                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044                                 break;
2045                         case 6:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2047                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2048                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2049                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2050                                 break;
2051                         case 7:
2052                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2053                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2054                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2055                                                  TILE_SPLIT(split_equal_to_row_size));
2056                                 break;
2057                         case 8:
2058                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2059                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2060                                 break;
2061                         case 9:
2062                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2063                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2064                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2065                                 break;
2066                         case 10:
2067                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2069                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2070                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                 break;
2072                         case 11:
2073                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2076                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                                 break;
2078                         case 12:
2079                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2082                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083                                 break;
2084                         case 13:
2085                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2088                                 break;
2089                         case 14:
2090                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2093                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                                 break;
2095                         case 16:
2096                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2098                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2099                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2100                                 break;
2101                         case 17:
2102                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2103                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2105                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2106                                 break;
2107                         case 27:
2108                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2111                                 break;
2112                         case 28:
2113                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2114                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2115                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2117                                 break;
2118                         case 29:
2119                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2120                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2121                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2122                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2123                                 break;
2124                         case 30:
2125                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2126                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2128                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2129                                 break;
2130                         default:
2131                                 gb_tile_moden = 0;
2132                                 break;
2133                         }
2134                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2135                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2136                 }
2137                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2138                         switch (reg_offset) {
2139                         case 0:
2140                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2141                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2144                                 break;
2145                         case 1:
2146                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2148                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2149                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2150                                 break;
2151                         case 2:
2152                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2154                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2155                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2156                                 break;
2157                         case 3:
2158                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2160                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2161                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2162                                 break;
2163                         case 4:
2164                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2166                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2167                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2168                                 break;
2169                         case 5:
2170                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2172                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2173                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2174                                 break;
2175                         case 6:
2176                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2179                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2180                                 break;
2181                         case 8:
2182                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2184                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2185                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2186                                 break;
2187                         case 9:
2188                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2190                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2192                                 break;
2193                         case 10:
2194                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2196                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2197                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2198                                 break;
2199                         case 11:
2200                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2202                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2203                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2204                                 break;
2205                         case 12:
2206                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2210                                 break;
2211                         case 13:
2212                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2215                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2216                                 break;
2217                         case 14:
2218                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2221                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2222                                 break;
2223                         default:
2224                                 gb_tile_moden = 0;
2225                                 break;
2226                         }
2227                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2228                 }
2229         } else if (num_pipe_configs == 8) {
2230                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2231                         switch (reg_offset) {
2232                         case 0:
2233                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2235                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2236                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2237                                 break;
2238                         case 1:
2239                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2241                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2242                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2243                                 break;
2244                         case 2:
2245                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2247                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2248                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2249                                 break;
2250                         case 3:
2251                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2252                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2253                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2254                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2255                                 break;
2256                         case 4:
2257                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2259                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2260                                                  TILE_SPLIT(split_equal_to_row_size));
2261                                 break;
2262                         case 5:
2263                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2264                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2265                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266                                 break;
2267                         case 6:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2270                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2271                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2272                                 break;
2273                         case 7:
2274                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2275                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2276                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2277                                                  TILE_SPLIT(split_equal_to_row_size));
2278                                 break;
2279                         case 8:
2280                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2282                                 break;
2283                         case 9:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2286                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2287                                 break;
2288                         case 10:
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293                                 break;
2294                         case 11:
2295                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2296                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2298                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                                 break;
2300                         case 12:
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2303                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2304                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                                 break;
2306                         case 13:
2307                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2310                                 break;
2311                         case 14:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2314                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2315                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316                                 break;
2317                         case 16:
2318                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2321                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322                                 break;
2323                         case 17:
2324                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2325                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2327                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328                                 break;
2329                         case 27:
2330                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2333                                 break;
2334                         case 28:
2335                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2337                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2338                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                                 break;
2340                         case 29:
2341                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2342                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2344                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345                                 break;
2346                         case 30:
2347                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2348                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2350                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351                                 break;
2352                         default:
2353                                 gb_tile_moden = 0;
2354                                 break;
2355                         }
2356                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2357                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2358                 }
2359                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2360                         switch (reg_offset) {
2361                         case 0:
2362                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2366                                 break;
2367                         case 1:
2368                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2372                                 break;
2373                         case 2:
2374                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2377                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2378                                 break;
2379                         case 3:
2380                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2384                                 break;
2385                         case 4:
2386                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2390                                 break;
2391                         case 5:
2392                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2394                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2396                                 break;
2397                         case 6:
2398                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2401                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2402                                 break;
2403                         case 8:
2404                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2407                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2408                                 break;
2409                         case 9:
2410                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2412                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2414                                 break;
2415                         case 10:
2416                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2418                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2419                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2420                                 break;
2421                         case 11:
2422                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2426                                 break;
2427                         case 12:
2428                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2432                                 break;
2433                         case 13:
2434                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2438                                 break;
2439                         case 14:
2440                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2443                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2444                                 break;
2445                         default:
2446                                 gb_tile_moden = 0;
2447                                 break;
2448                         }
2449                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2450                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2451                 }
2452         } else if (num_pipe_configs == 4) {
2453                 if (num_rbs == 4) {
2454                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2455                                 switch (reg_offset) {
2456                                 case 0:
2457                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2459                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2461                                         break;
2462                                 case 1:
2463                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2467                                         break;
2468                                 case 2:
2469                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2471                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2472                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2473                                         break;
2474                                 case 3:
2475                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2477                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2479                                         break;
2480                                 case 4:
2481                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2483                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484                                                          TILE_SPLIT(split_equal_to_row_size));
2485                                         break;
2486                                 case 5:
2487                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490                                         break;
2491                                 case 6:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2496                                         break;
2497                                 case 7:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501                                                          TILE_SPLIT(split_equal_to_row_size));
2502                                         break;
2503                                 case 8:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2505                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2506                                         break;
2507                                 case 9:
2508                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2509                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2511                                         break;
2512                                 case 10:
2513                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2516                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                                         break;
2518                                 case 11:
2519                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2522                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523                                         break;
2524                                 case 12:
2525                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2526                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                                         break;
2530                                 case 13:
2531                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2532                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2534                                         break;
2535                                 case 14:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 16:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 17:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 case 27:
2554                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2557                                         break;
2558                                 case 28:
2559                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2561                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                                         break;
2564                                 case 29:
2565                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2567                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569                                         break;
2570                                 case 30:
2571                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2572                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2573                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2574                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                                         break;
2576                                 default:
2577                                         gb_tile_moden = 0;
2578                                         break;
2579                                 }
2580                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2581                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2582                         }
2583                 } else if (num_rbs < 4) {
2584                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2585                                 switch (reg_offset) {
2586                                 case 0:
2587                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2589                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2590                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2591                                         break;
2592                                 case 1:
2593                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2595                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2596                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2597                                         break;
2598                                 case 2:
2599                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2602                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603                                         break;
2604                                 case 3:
2605                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2607                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2609                                         break;
2610                                 case 4:
2611                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2613                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614                                                          TILE_SPLIT(split_equal_to_row_size));
2615                                         break;
2616                                 case 5:
2617                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620                                         break;
2621                                 case 6:
2622                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2623                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2624                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2625                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2626                                         break;
2627                                 case 7:
2628                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2629                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2630                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2631                                                          TILE_SPLIT(split_equal_to_row_size));
2632                                         break;
2633                                 case 8:
2634                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2636                                         break;
2637                                 case 9:
2638                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2639                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2640                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2641                                         break;
2642                                 case 10:
2643                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2645                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2646                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                                         break;
2648                                 case 11:
2649                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2651                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2652                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2653                                         break;
2654                                 case 12:
2655                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2656                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659                                         break;
2660                                 case 13:
2661                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2662                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2664                                         break;
2665                                 case 14:
2666                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670                                         break;
2671                                 case 16:
2672                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2673                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2676                                         break;
2677                                 case 17:
2678                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2679                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2682                                         break;
2683                                 case 27:
2684                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2687                                         break;
2688                                 case 28:
2689                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2690                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2692                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                                         break;
2694                                 case 29:
2695                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2697                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                                         break;
2700                                 case 30:
2701                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2704                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                                         break;
2706                                 default:
2707                                         gb_tile_moden = 0;
2708                                         break;
2709                                 }
2710                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2711                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2712                         }
2713                 }
2714                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2715                         switch (reg_offset) {
2716                         case 0:
2717                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2721                                 break;
2722                         case 1:
2723                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2725                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2727                                 break;
2728                         case 2:
2729                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2731                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2732                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2733                                 break;
2734                         case 3:
2735                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2738                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2739                                 break;
2740                         case 4:
2741                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2745                                 break;
2746                         case 5:
2747                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2749                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2750                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2751                                 break;
2752                         case 6:
2753                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2757                                 break;
2758                         case 8:
2759                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2760                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2761                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2762                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2763                                 break;
2764                         case 9:
2765                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2766                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2769                                 break;
2770                         case 10:
2771                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2773                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2775                                 break;
2776                         case 11:
2777                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2781                                 break;
2782                         case 12:
2783                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2786                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2787                                 break;
2788                         case 13:
2789                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2792                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2793                                 break;
2794                         case 14:
2795                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2798                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2799                                 break;
2800                         default:
2801                                 gb_tile_moden = 0;
2802                                 break;
2803                         }
2804                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2805                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2806                 }
2807         } else if (num_pipe_configs == 2) {
2808                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2809                         switch (reg_offset) {
2810                         case 0:
2811                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2813                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2814                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2815                                 break;
2816                         case 1:
2817                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2819                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2820                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2821                                 break;
2822                         case 2:
2823                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2826                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827                                 break;
2828                         case 3:
2829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2831                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2832                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2833                                 break;
2834                         case 4:
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2838                                                  TILE_SPLIT(split_equal_to_row_size));
2839                                 break;
2840                         case 5:
2841                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2843                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2844                                 break;
2845                         case 6:
2846                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2847                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2848                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2849                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2850                                 break;
2851                         case 7:
2852                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2853                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2854                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2855                                                  TILE_SPLIT(split_equal_to_row_size));
2856                                 break;
2857                         case 8:
2858                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2859                                                 PIPE_CONFIG(ADDR_SURF_P2);
2860                                 break;
2861                         case 9:
2862                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2863                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2864                                                  PIPE_CONFIG(ADDR_SURF_P2));
2865                                 break;
2866                         case 10:
2867                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2869                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2870                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871                                 break;
2872                         case 11:
2873                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2874                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2875                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2876                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2877                                 break;
2878                         case 12:
2879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2880                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2881                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2882                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883                                 break;
2884                         case 13:
2885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2886                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2887                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2888                                 break;
2889                         case 14:
2890                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2893                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894                                 break;
2895                         case 16:
2896                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2899                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2900                                 break;
2901                         case 17:
2902                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2903                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2904                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2905                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2906                                 break;
2907                         case 27:
2908                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910                                                  PIPE_CONFIG(ADDR_SURF_P2));
2911                                 break;
2912                         case 28:
2913                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2916                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                                 break;
2918                         case 29:
2919                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2921                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2922                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923                                 break;
2924                         case 30:
2925                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2927                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2928                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                                 break;
2930                         default:
2931                                 gb_tile_moden = 0;
2932                                 break;
2933                         }
2934                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2935                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2936                 }
2937                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2938                         switch (reg_offset) {
2939                         case 0:
2940                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2941                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2942                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2944                                 break;
2945                         case 1:
2946                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2947                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2948                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2949                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2950                                 break;
2951                         case 2:
2952                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2954                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2955                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2956                                 break;
2957                         case 3:
2958                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2962                                 break;
2963                         case 4:
2964                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2968                                 break;
2969                         case 5:
2970                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2974                                 break;
2975                         case 6:
2976                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2980                                 break;
2981                         case 8:
2982                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2983                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2984                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2986                                 break;
2987                         case 9:
2988                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2992                                 break;
2993                         case 10:
2994                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2995                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2998                                 break;
2999                         case 11:
3000                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3004                                 break;
3005                         case 12:
3006                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3010                                 break;
3011                         case 13:
3012                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3016                                 break;
3017                         case 14:
3018                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3021                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3022                                 break;
3023                         default:
3024                                 gb_tile_moden = 0;
3025                                 break;
3026                         }
3027                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3028                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3029                 }
3030         } else
3031                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3032 }
3033
3034 /**
3035  * cik_select_se_sh - select which SE, SH to address
3036  *
3037  * @rdev: radeon_device pointer
3038  * @se_num: shader engine to address
3039  * @sh_num: sh block to address
3040  *
3041  * Select which SE, SH combinations to address. Certain
3042  * registers are instanced per SE or SH.  0xffffffff means
3043  * broadcast to all SEs or SHs (CIK).
3044  */
3045 static void cik_select_se_sh(struct radeon_device *rdev,
3046                              u32 se_num, u32 sh_num)
3047 {
3048         u32 data = INSTANCE_BROADCAST_WRITES;
3049
3050         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3051                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3052         else if (se_num == 0xffffffff)
3053                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3054         else if (sh_num == 0xffffffff)
3055                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3056         else
3057                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3058         WREG32(GRBM_GFX_INDEX, data);
3059 }
3060
3061 /**
3062  * cik_create_bitmask - create a bitmask
3063  *
3064  * @bit_width: length of the mask
3065  *
3066  * create a variable length bit mask (CIK).
3067  * Returns the bitmask.
3068  */
3069 static u32 cik_create_bitmask(u32 bit_width)
3070 {
3071         u32 i, mask = 0;
3072
3073         for (i = 0; i < bit_width; i++) {
3074                 mask <<= 1;
3075                 mask |= 1;
3076         }
3077         return mask;
3078 }
3079
3080 /**
3081  * cik_get_rb_disabled - computes the mask of disabled RBs
3082  *
3083  * @rdev: radeon_device pointer
3084  * @max_rb_num: max RBs (render backends) for the asic
3085  * @se_num: number of SEs (shader engines) for the asic
3086  * @sh_per_se: number of SH blocks per SE for the asic
3087  *
3088  * Calculates the bitmask of disabled RBs (CIK).
3089  * Returns the disabled RB bitmask.
3090  */
3091 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3092                               u32 max_rb_num_per_se,
3093                               u32 sh_per_se)
3094 {
3095         u32 data, mask;
3096
3097         data = RREG32(CC_RB_BACKEND_DISABLE);
3098         if (data & 1)
3099                 data &= BACKEND_DISABLE_MASK;
3100         else
3101                 data = 0;
3102         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3103
3104         data >>= BACKEND_DISABLE_SHIFT;
3105
3106         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3107
3108         return data & mask;
3109 }
3110
3111 /**
3112  * cik_setup_rb - setup the RBs on the asic
3113  *
3114  * @rdev: radeon_device pointer
3115  * @se_num: number of SEs (shader engines) for the asic
3116  * @sh_per_se: number of SH blocks per SE for the asic
3117  * @max_rb_num: max RBs (render backends) for the asic
3118  *
3119  * Configures per-SE/SH RB registers (CIK).
3120  */
3121 static void cik_setup_rb(struct radeon_device *rdev,
3122                          u32 se_num, u32 sh_per_se,
3123                          u32 max_rb_num_per_se)
3124 {
3125         int i, j;
3126         u32 data, mask;
3127         u32 disabled_rbs = 0;
3128         u32 enabled_rbs = 0;
3129
3130         for (i = 0; i < se_num; i++) {
3131                 for (j = 0; j < sh_per_se; j++) {
3132                         cik_select_se_sh(rdev, i, j);
3133                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3134                         if (rdev->family == CHIP_HAWAII)
3135                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3136                         else
3137                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3138                 }
3139         }
3140         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3141
3142         mask = 1;
3143         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3144                 if (!(disabled_rbs & mask))
3145                         enabled_rbs |= mask;
3146                 mask <<= 1;
3147         }
3148
3149         rdev->config.cik.backend_enable_mask = enabled_rbs;
3150
3151         for (i = 0; i < se_num; i++) {
3152                 cik_select_se_sh(rdev, i, 0xffffffff);
3153                 data = 0;
3154                 for (j = 0; j < sh_per_se; j++) {
3155                         switch (enabled_rbs & 3) {
3156                         case 0:
3157                                 if (j == 0)
3158                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3159                                 else
3160                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3161                                 break;
3162                         case 1:
3163                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3164                                 break;
3165                         case 2:
3166                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3167                                 break;
3168                         case 3:
3169                         default:
3170                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3171                                 break;
3172                         }
3173                         enabled_rbs >>= 2;
3174                 }
3175                 WREG32(PA_SC_RASTER_CONFIG, data);
3176         }
3177         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3178 }
3179
3180 /**
3181  * cik_gpu_init - setup the 3D engine
3182  *
3183  * @rdev: radeon_device pointer
3184  *
3185  * Configures the 3D engine and tiling configuration
3186  * registers so that the 3D engine is usable.
3187  */
3188 static void cik_gpu_init(struct radeon_device *rdev)
3189 {
3190         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3191         u32 mc_shared_chmap, mc_arb_ramcfg;
3192         u32 hdp_host_path_cntl;
3193         u32 tmp;
3194         int i, j;
3195
3196         switch (rdev->family) {
3197         case CHIP_BONAIRE:
3198                 rdev->config.cik.max_shader_engines = 2;
3199                 rdev->config.cik.max_tile_pipes = 4;
3200                 rdev->config.cik.max_cu_per_sh = 7;
3201                 rdev->config.cik.max_sh_per_se = 1;
3202                 rdev->config.cik.max_backends_per_se = 2;
3203                 rdev->config.cik.max_texture_channel_caches = 4;
3204                 rdev->config.cik.max_gprs = 256;
3205                 rdev->config.cik.max_gs_threads = 32;
3206                 rdev->config.cik.max_hw_contexts = 8;
3207
3208                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3209                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3210                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3211                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3212                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3213                 break;
3214         case CHIP_HAWAII:
3215                 rdev->config.cik.max_shader_engines = 4;
3216                 rdev->config.cik.max_tile_pipes = 16;
3217                 rdev->config.cik.max_cu_per_sh = 11;
3218                 rdev->config.cik.max_sh_per_se = 1;
3219                 rdev->config.cik.max_backends_per_se = 4;
3220                 rdev->config.cik.max_texture_channel_caches = 16;
3221                 rdev->config.cik.max_gprs = 256;
3222                 rdev->config.cik.max_gs_threads = 32;
3223                 rdev->config.cik.max_hw_contexts = 8;
3224
3225                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3226                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3227                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3228                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3229                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3230                 break;
3231         case CHIP_KAVERI:
3232                 rdev->config.cik.max_shader_engines = 1;
3233                 rdev->config.cik.max_tile_pipes = 4;
3234                 if ((rdev->pdev->device == 0x1304) ||
3235                     (rdev->pdev->device == 0x1305) ||
3236                     (rdev->pdev->device == 0x130C) ||
3237                     (rdev->pdev->device == 0x130F) ||
3238                     (rdev->pdev->device == 0x1310) ||
3239                     (rdev->pdev->device == 0x1311) ||
3240                     (rdev->pdev->device == 0x131C)) {
3241                         rdev->config.cik.max_cu_per_sh = 8;
3242                         rdev->config.cik.max_backends_per_se = 2;
3243                 } else if ((rdev->pdev->device == 0x1309) ||
3244                            (rdev->pdev->device == 0x130A) ||
3245                            (rdev->pdev->device == 0x130D) ||
3246                            (rdev->pdev->device == 0x1313) ||
3247                            (rdev->pdev->device == 0x131D)) {
3248                         rdev->config.cik.max_cu_per_sh = 6;
3249                         rdev->config.cik.max_backends_per_se = 2;
3250                 } else if ((rdev->pdev->device == 0x1306) ||
3251                            (rdev->pdev->device == 0x1307) ||
3252                            (rdev->pdev->device == 0x130B) ||
3253                            (rdev->pdev->device == 0x130E) ||
3254                            (rdev->pdev->device == 0x1315) ||
3255                            (rdev->pdev->device == 0x131B)) {
3256                         rdev->config.cik.max_cu_per_sh = 4;
3257                         rdev->config.cik.max_backends_per_se = 1;
3258                 } else {
3259                         rdev->config.cik.max_cu_per_sh = 3;
3260                         rdev->config.cik.max_backends_per_se = 1;
3261                 }
3262                 rdev->config.cik.max_sh_per_se = 1;
3263                 rdev->config.cik.max_texture_channel_caches = 4;
3264                 rdev->config.cik.max_gprs = 256;
3265                 rdev->config.cik.max_gs_threads = 16;
3266                 rdev->config.cik.max_hw_contexts = 8;
3267
3268                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3269                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3270                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3271                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3272                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3273                 break;
3274         case CHIP_KABINI:
3275         default:
3276                 rdev->config.cik.max_shader_engines = 1;
3277                 rdev->config.cik.max_tile_pipes = 2;
3278                 rdev->config.cik.max_cu_per_sh = 2;
3279                 rdev->config.cik.max_sh_per_se = 1;
3280                 rdev->config.cik.max_backends_per_se = 1;
3281                 rdev->config.cik.max_texture_channel_caches = 2;
3282                 rdev->config.cik.max_gprs = 256;
3283                 rdev->config.cik.max_gs_threads = 16;
3284                 rdev->config.cik.max_hw_contexts = 8;
3285
3286                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291                 break;
3292         }
3293
3294         /* Initialize HDP */
3295         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296                 WREG32((0x2c14 + j), 0x00000000);
3297                 WREG32((0x2c18 + j), 0x00000000);
3298                 WREG32((0x2c1c + j), 0x00000000);
3299                 WREG32((0x2c20 + j), 0x00000000);
3300                 WREG32((0x2c24 + j), 0x00000000);
3301         }
3302
3303         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304
3305         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3306
3307         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3308         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3309
3310         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3311         rdev->config.cik.mem_max_burst_length_bytes = 256;
3312         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3313         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3314         if (rdev->config.cik.mem_row_size_in_kb > 4)
3315                 rdev->config.cik.mem_row_size_in_kb = 4;
3316         /* XXX use MC settings? */
3317         rdev->config.cik.shader_engine_tile_size = 32;
3318         rdev->config.cik.num_gpus = 1;
3319         rdev->config.cik.multi_gpu_tile_size = 64;
3320
3321         /* fix up row size */
3322         gb_addr_config &= ~ROW_SIZE_MASK;
3323         switch (rdev->config.cik.mem_row_size_in_kb) {
3324         case 1:
3325         default:
3326                 gb_addr_config |= ROW_SIZE(0);
3327                 break;
3328         case 2:
3329                 gb_addr_config |= ROW_SIZE(1);
3330                 break;
3331         case 4:
3332                 gb_addr_config |= ROW_SIZE(2);
3333                 break;
3334         }
3335
3336         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3337          * not have bank info, so create a custom tiling dword.
3338          * bits 3:0   num_pipes
3339          * bits 7:4   num_banks
3340          * bits 11:8  group_size
3341          * bits 15:12 row_size
3342          */
3343         rdev->config.cik.tile_config = 0;
3344         switch (rdev->config.cik.num_tile_pipes) {
3345         case 1:
3346                 rdev->config.cik.tile_config |= (0 << 0);
3347                 break;
3348         case 2:
3349                 rdev->config.cik.tile_config |= (1 << 0);
3350                 break;
3351         case 4:
3352                 rdev->config.cik.tile_config |= (2 << 0);
3353                 break;
3354         case 8:
3355         default:
3356                 /* XXX what about 12? */
3357                 rdev->config.cik.tile_config |= (3 << 0);
3358                 break;
3359         }
3360         rdev->config.cik.tile_config |=
3361                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3362         rdev->config.cik.tile_config |=
3363                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3364         rdev->config.cik.tile_config |=
3365                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3366
3367         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3368         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3369         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3370         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3371         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3372         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3373         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3374         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3375
3376         cik_tiling_mode_table_init(rdev);
3377
3378         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3379                      rdev->config.cik.max_sh_per_se,
3380                      rdev->config.cik.max_backends_per_se);
3381
3382         /* set HW defaults for 3D engine */
3383         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3384
3385         WREG32(SX_DEBUG_1, 0x20);
3386
3387         WREG32(TA_CNTL_AUX, 0x00010000);
3388
3389         tmp = RREG32(SPI_CONFIG_CNTL);
3390         tmp |= 0x03000000;
3391         WREG32(SPI_CONFIG_CNTL, tmp);
3392
3393         WREG32(SQ_CONFIG, 1);
3394
3395         WREG32(DB_DEBUG, 0);
3396
3397         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3398         tmp |= 0x00000400;
3399         WREG32(DB_DEBUG2, tmp);
3400
3401         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3402         tmp |= 0x00020200;
3403         WREG32(DB_DEBUG3, tmp);
3404
3405         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3406         tmp |= 0x00018208;
3407         WREG32(CB_HW_CONTROL, tmp);
3408
3409         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3410
3411         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3412                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3413                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3414                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3415
3416         WREG32(VGT_NUM_INSTANCES, 1);
3417
3418         WREG32(CP_PERFMON_CNTL, 0);
3419
3420         WREG32(SQ_CONFIG, 0);
3421
3422         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3423                                           FORCE_EOV_MAX_REZ_CNT(255)));
3424
3425         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3426                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3427
3428         WREG32(VGT_GS_VERTEX_REUSE, 16);
3429         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3430
3431         tmp = RREG32(HDP_MISC_CNTL);
3432         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3433         WREG32(HDP_MISC_CNTL, tmp);
3434
3435         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3436         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3437
3438         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3439         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3440
3441         udelay(50);
3442 }
3443
3444 /*
3445  * GPU scratch registers helpers function.
3446  */
3447 /**
3448  * cik_scratch_init - setup driver info for CP scratch regs
3449  *
3450  * @rdev: radeon_device pointer
3451  *
3452  * Set up the number and offset of the CP scratch registers.
3453  * NOTE: use of CP scratch registers is a legacy inferface and
3454  * is not used by default on newer asics (r6xx+).  On newer asics,
3455  * memory buffers are used for fences rather than scratch regs.
3456  */
3457 static void cik_scratch_init(struct radeon_device *rdev)
3458 {
3459         int i;
3460
3461         rdev->scratch.num_reg = 7;
3462         rdev->scratch.reg_base = SCRATCH_REG0;
3463         for (i = 0; i < rdev->scratch.num_reg; i++) {
3464                 rdev->scratch.free[i] = true;
3465                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3466         }
3467 }
3468
3469 /**
3470  * cik_ring_test - basic gfx ring test
3471  *
3472  * @rdev: radeon_device pointer
3473  * @ring: radeon_ring structure holding ring information
3474  *
3475  * Allocate a scratch register and write to it using the gfx ring (CIK).
3476  * Provides a basic gfx ring test to verify that the ring is working.
3477  * Used by cik_cp_gfx_resume();
3478  * Returns 0 on success, error on failure.
3479  */
3480 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3481 {
3482         uint32_t scratch;
3483         uint32_t tmp = 0;
3484         unsigned i;
3485         int r;
3486
3487         r = radeon_scratch_get(rdev, &scratch);
3488         if (r) {
3489                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3490                 return r;
3491         }
3492         WREG32(scratch, 0xCAFEDEAD);
3493         r = radeon_ring_lock(rdev, ring, 3);
3494         if (r) {
3495                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3496                 radeon_scratch_free(rdev, scratch);
3497                 return r;
3498         }
3499         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3500         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3501         radeon_ring_write(ring, 0xDEADBEEF);
3502         radeon_ring_unlock_commit(rdev, ring);
3503
3504         for (i = 0; i < rdev->usec_timeout; i++) {
3505                 tmp = RREG32(scratch);
3506                 if (tmp == 0xDEADBEEF)
3507                         break;
3508                 DRM_UDELAY(1);
3509         }
3510         if (i < rdev->usec_timeout) {
3511                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3512         } else {
3513                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3514                           ring->idx, scratch, tmp);
3515                 r = -EINVAL;
3516         }
3517         radeon_scratch_free(rdev, scratch);
3518         return r;
3519 }
3520
3521 /**
3522  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3523  *
3524  * @rdev: radeon_device pointer
3525  * @ridx: radeon ring index
3526  *
3527  * Emits an hdp flush on the cp.
3528  */
3529 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3530                                        int ridx)
3531 {
3532         struct radeon_ring *ring = &rdev->ring[ridx];
3533         u32 ref_and_mask;
3534
3535         switch (ring->idx) {
3536         case CAYMAN_RING_TYPE_CP1_INDEX:
3537         case CAYMAN_RING_TYPE_CP2_INDEX:
3538         default:
3539                 switch (ring->me) {
3540                 case 0:
3541                         ref_and_mask = CP2 << ring->pipe;
3542                         break;
3543                 case 1:
3544                         ref_and_mask = CP6 << ring->pipe;
3545                         break;
3546                 default:
3547                         return;
3548                 }
3549                 break;
3550         case RADEON_RING_TYPE_GFX_INDEX:
3551                 ref_and_mask = CP0;
3552                 break;
3553         }
3554
3555         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3556         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3557                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3558                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3559         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3560         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3561         radeon_ring_write(ring, ref_and_mask);
3562         radeon_ring_write(ring, ref_and_mask);
3563         radeon_ring_write(ring, 0x20); /* poll interval */
3564 }
3565
3566 /**
3567  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3568  *
3569  * @rdev: radeon_device pointer
3570  * @fence: radeon fence object
3571  *
3572  * Emits a fence sequnce number on the gfx ring and flushes
3573  * GPU caches.
3574  */
3575 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3576                              struct radeon_fence *fence)
3577 {
3578         struct radeon_ring *ring = &rdev->ring[fence->ring];
3579         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3580
3581         /* EVENT_WRITE_EOP - flush caches, send int */
3582         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3583         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3584                                  EOP_TC_ACTION_EN |
3585                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3586                                  EVENT_INDEX(5)));
3587         radeon_ring_write(ring, addr & 0xfffffffc);
3588         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3589         radeon_ring_write(ring, fence->seq);
3590         radeon_ring_write(ring, 0);
3591         /* HDP flush */
3592         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3593 }
3594
3595 /**
3596  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3597  *
3598  * @rdev: radeon_device pointer
3599  * @fence: radeon fence object
3600  *
3601  * Emits a fence sequnce number on the compute ring and flushes
3602  * GPU caches.
3603  */
3604 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3605                                  struct radeon_fence *fence)
3606 {
3607         struct radeon_ring *ring = &rdev->ring[fence->ring];
3608         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3609
3610         /* RELEASE_MEM - flush caches, send int */
3611         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3612         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3613                                  EOP_TC_ACTION_EN |
3614                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3615                                  EVENT_INDEX(5)));
3616         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3617         radeon_ring_write(ring, addr & 0xfffffffc);
3618         radeon_ring_write(ring, upper_32_bits(addr));
3619         radeon_ring_write(ring, fence->seq);
3620         radeon_ring_write(ring, 0);
3621         /* HDP flush */
3622         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3623 }
3624
3625 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626                              struct radeon_ring *ring,
3627                              struct radeon_semaphore *semaphore,
3628                              bool emit_wait)
3629 {
3630         uint64_t addr = semaphore->gpu_addr;
3631         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632
3633         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634         radeon_ring_write(ring, addr & 0xffffffff);
3635         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636
3637         return true;
3638 }
3639
3640 /**
3641  * cik_copy_cpdma - copy pages using the CP DMA engine
3642  *
3643  * @rdev: radeon_device pointer
3644  * @src_offset: src GPU address
3645  * @dst_offset: dst GPU address
3646  * @num_gpu_pages: number of GPU pages to xfer
3647  * @fence: radeon fence object
3648  *
3649  * Copy GPU paging using the CP DMA engine (CIK+).
3650  * Used by the radeon ttm implementation to move pages if
3651  * registered as the asic copy callback.
3652  */
3653 int cik_copy_cpdma(struct radeon_device *rdev,
3654                    uint64_t src_offset, uint64_t dst_offset,
3655                    unsigned num_gpu_pages,
3656                    struct radeon_fence **fence)
3657 {
3658         struct radeon_semaphore *sem = NULL;
3659         int ring_index = rdev->asic->copy.blit_ring_index;
3660         struct radeon_ring *ring = &rdev->ring[ring_index];
3661         u32 size_in_bytes, cur_size_in_bytes, control;
3662         int i, num_loops;
3663         int r = 0;
3664
3665         r = radeon_semaphore_create(rdev, &sem);
3666         if (r) {
3667                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3668                 return r;
3669         }
3670
3671         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3672         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3673         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3674         if (r) {
3675                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3676                 radeon_semaphore_free(rdev, &sem, NULL);
3677                 return r;
3678         }
3679
3680         radeon_semaphore_sync_to(sem, *fence);
3681         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3682
3683         for (i = 0; i < num_loops; i++) {
3684                 cur_size_in_bytes = size_in_bytes;
3685                 if (cur_size_in_bytes > 0x1fffff)
3686                         cur_size_in_bytes = 0x1fffff;
3687                 size_in_bytes -= cur_size_in_bytes;
3688                 control = 0;
3689                 if (size_in_bytes == 0)
3690                         control |= PACKET3_DMA_DATA_CP_SYNC;
3691                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3692                 radeon_ring_write(ring, control);
3693                 radeon_ring_write(ring, lower_32_bits(src_offset));
3694                 radeon_ring_write(ring, upper_32_bits(src_offset));
3695                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3696                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3697                 radeon_ring_write(ring, cur_size_in_bytes);
3698                 src_offset += cur_size_in_bytes;
3699                 dst_offset += cur_size_in_bytes;
3700         }
3701
3702         r = radeon_fence_emit(rdev, fence, ring->idx);
3703         if (r) {
3704                 radeon_ring_unlock_undo(rdev, ring);
3705                 return r;
3706         }
3707
3708         radeon_ring_unlock_commit(rdev, ring);
3709         radeon_semaphore_free(rdev, &sem, *fence);
3710
3711         return r;
3712 }
3713
3714 /*
3715  * IB stuff
3716  */
3717 /**
3718  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ib: radeon indirect buffer object
3722  *
3723  * Emits an DE (drawing engine) or CE (constant engine) IB
3724  * on the gfx ring.  IBs are usually generated by userspace
3725  * acceleration drivers and submitted to the kernel for
3726  * sheduling on the ring.  This function schedules the IB
3727  * on the gfx ring for execution by the GPU.
3728  */
3729 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730 {
3731         struct radeon_ring *ring = &rdev->ring[ib->ring];
3732         u32 header, control = INDIRECT_BUFFER_VALID;
3733
3734         if (ib->is_const_ib) {
3735                 /* set switch buffer packet before const IB */
3736                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3737                 radeon_ring_write(ring, 0);
3738
3739                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3740         } else {
3741                 u32 next_rptr;
3742                 if (ring->rptr_save_reg) {
3743                         next_rptr = ring->wptr + 3 + 4;
3744                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3745                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3746                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3747                         radeon_ring_write(ring, next_rptr);
3748                 } else if (rdev->wb.enabled) {
3749                         next_rptr = ring->wptr + 5 + 4;
3750                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3751                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3752                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3753                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3754                         radeon_ring_write(ring, next_rptr);
3755                 }
3756
3757                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3758         }
3759
3760         control |= ib->length_dw |
3761                 (ib->vm ? (ib->vm->id << 24) : 0);
3762
3763         radeon_ring_write(ring, header);
3764         radeon_ring_write(ring,
3765 #ifdef __BIG_ENDIAN
3766                           (2 << 0) |
3767 #endif
3768                           (ib->gpu_addr & 0xFFFFFFFC));
3769         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770         radeon_ring_write(ring, control);
3771 }
3772
3773 /**
3774  * cik_ib_test - basic gfx ring IB test
3775  *
3776  * @rdev: radeon_device pointer
3777  * @ring: radeon_ring structure holding ring information
3778  *
3779  * Allocate an IB and execute it on the gfx ring (CIK).
3780  * Provides a basic gfx ring test to verify that IBs are working.
3781  * Returns 0 on success, error on failure.
3782  */
3783 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784 {
3785         struct radeon_ib ib;
3786         uint32_t scratch;
3787         uint32_t tmp = 0;
3788         unsigned i;
3789         int r;
3790
3791         r = radeon_scratch_get(rdev, &scratch);
3792         if (r) {
3793                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794                 return r;
3795         }
3796         WREG32(scratch, 0xCAFEDEAD);
3797         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798         if (r) {
3799                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800                 radeon_scratch_free(rdev, scratch);
3801                 return r;
3802         }
3803         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805         ib.ptr[2] = 0xDEADBEEF;
3806         ib.length_dw = 3;
3807         r = radeon_ib_schedule(rdev, &ib, NULL);
3808         if (r) {
3809                 radeon_scratch_free(rdev, scratch);
3810                 radeon_ib_free(rdev, &ib);
3811                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812                 return r;
3813         }
3814         r = radeon_fence_wait(ib.fence, false);
3815         if (r) {
3816                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3817                 radeon_scratch_free(rdev, scratch);
3818                 radeon_ib_free(rdev, &ib);
3819                 return r;
3820         }
3821         for (i = 0; i < rdev->usec_timeout; i++) {
3822                 tmp = RREG32(scratch);
3823                 if (tmp == 0xDEADBEEF)
3824                         break;
3825                 DRM_UDELAY(1);
3826         }
3827         if (i < rdev->usec_timeout) {
3828                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3829         } else {
3830                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3831                           scratch, tmp);
3832                 r = -EINVAL;
3833         }
3834         radeon_scratch_free(rdev, scratch);
3835         radeon_ib_free(rdev, &ib);
3836         return r;
3837 }
3838
3839 /*
3840  * CP.
3841  * On CIK, gfx and compute now have independant command processors.
3842  *
3843  * GFX
3844  * Gfx consists of a single ring and can process both gfx jobs and
3845  * compute jobs.  The gfx CP consists of three microengines (ME):
3846  * PFP - Pre-Fetch Parser
3847  * ME - Micro Engine
3848  * CE - Constant Engine
3849  * The PFP and ME make up what is considered the Drawing Engine (DE).
3850  * The CE is an asynchronous engine used for updating buffer desciptors
3851  * used by the DE so that they can be loaded into cache in parallel
3852  * while the DE is processing state update packets.
3853  *
3854  * Compute
3855  * The compute CP consists of two microengines (ME):
3856  * MEC1 - Compute MicroEngine 1
3857  * MEC2 - Compute MicroEngine 2
3858  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3859  * The queues are exposed to userspace and are programmed directly
3860  * by the compute runtime.
3861  */
3862 /**
3863  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3864  *
3865  * @rdev: radeon_device pointer
3866  * @enable: enable or disable the MEs
3867  *
3868  * Halts or unhalts the gfx MEs.
3869  */
3870 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3871 {
3872         if (enable)
3873                 WREG32(CP_ME_CNTL, 0);
3874         else {
3875                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3876                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3877                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3878                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3879         }
3880         udelay(50);
3881 }
3882
3883 /**
3884  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3885  *
3886  * @rdev: radeon_device pointer
3887  *
3888  * Loads the gfx PFP, ME, and CE ucode.
3889  * Returns 0 for success, -EINVAL if the ucode is not available.
3890  */
3891 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3892 {
3893         const __be32 *fw_data;
3894         int i;
3895
3896         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3897                 return -EINVAL;
3898
3899         cik_cp_gfx_enable(rdev, false);
3900
3901         /* PFP */
3902         fw_data = (const __be32 *)rdev->pfp_fw->data;
3903         WREG32(CP_PFP_UCODE_ADDR, 0);
3904         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3905                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3906         WREG32(CP_PFP_UCODE_ADDR, 0);
3907
3908         /* CE */
3909         fw_data = (const __be32 *)rdev->ce_fw->data;
3910         WREG32(CP_CE_UCODE_ADDR, 0);
3911         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3912                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3913         WREG32(CP_CE_UCODE_ADDR, 0);
3914
3915         /* ME */
3916         fw_data = (const __be32 *)rdev->me_fw->data;
3917         WREG32(CP_ME_RAM_WADDR, 0);
3918         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3919                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3920         WREG32(CP_ME_RAM_WADDR, 0);
3921
3922         WREG32(CP_PFP_UCODE_ADDR, 0);
3923         WREG32(CP_CE_UCODE_ADDR, 0);
3924         WREG32(CP_ME_RAM_WADDR, 0);
3925         WREG32(CP_ME_RAM_RADDR, 0);
3926         return 0;
3927 }
3928
3929 /**
3930  * cik_cp_gfx_start - start the gfx ring
3931  *
3932  * @rdev: radeon_device pointer
3933  *
3934  * Enables the ring and loads the clear state context and other
3935  * packets required to init the ring.
3936  * Returns 0 for success, error for failure.
3937  */
3938 static int cik_cp_gfx_start(struct radeon_device *rdev)
3939 {
3940         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3941         int r, i;
3942
3943         /* init the CP */
3944         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3945         WREG32(CP_ENDIAN_SWAP, 0);
3946         WREG32(CP_DEVICE_ID, 1);
3947
3948         cik_cp_gfx_enable(rdev, true);
3949
3950         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3951         if (r) {
3952                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3953                 return r;
3954         }
3955
3956         /* init the CE partitions.  CE only used for gfx on CIK */
3957         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3958         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3959         radeon_ring_write(ring, 0xc000);
3960         radeon_ring_write(ring, 0xc000);
3961
3962         /* setup clear context state */
3963         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3964         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3965
3966         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3967         radeon_ring_write(ring, 0x80000000);
3968         radeon_ring_write(ring, 0x80000000);
3969
3970         for (i = 0; i < cik_default_size; i++)
3971                 radeon_ring_write(ring, cik_default_state[i]);
3972
3973         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3974         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3975
3976         /* set clear context state */
3977         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3978         radeon_ring_write(ring, 0);
3979
3980         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3981         radeon_ring_write(ring, 0x00000316);
3982         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3983         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3984
3985         radeon_ring_unlock_commit(rdev, ring);
3986
3987         return 0;
3988 }
3989
3990 /**
3991  * cik_cp_gfx_fini - stop the gfx ring
3992  *
3993  * @rdev: radeon_device pointer
3994  *
3995  * Stop the gfx ring and tear down the driver ring
3996  * info.
3997  */
3998 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3999 {
4000         cik_cp_gfx_enable(rdev, false);
4001         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4002 }
4003
4004 /**
4005  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4006  *
4007  * @rdev: radeon_device pointer
4008  *
4009  * Program the location and size of the gfx ring buffer
4010  * and test it to make sure it's working.
4011  * Returns 0 for success, error for failure.
4012  */
4013 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4014 {
4015         struct radeon_ring *ring;
4016         u32 tmp;
4017         u32 rb_bufsz;
4018         u64 rb_addr;
4019         int r;
4020
4021         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4022         if (rdev->family != CHIP_HAWAII)
4023                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4024
4025         /* Set the write pointer delay */
4026         WREG32(CP_RB_WPTR_DELAY, 0);
4027
4028         /* set the RB to use vmid 0 */
4029         WREG32(CP_RB_VMID, 0);
4030
4031         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4032
4033         /* ring 0 - compute and gfx */
4034         /* Set ring buffer size */
4035         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4036         rb_bufsz = order_base_2(ring->ring_size / 8);
4037         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4038 #ifdef __BIG_ENDIAN
4039         tmp |= BUF_SWAP_32BIT;
4040 #endif
4041         WREG32(CP_RB0_CNTL, tmp);
4042
4043         /* Initialize the ring buffer's read and write pointers */
4044         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4045         ring->wptr = 0;
4046         WREG32(CP_RB0_WPTR, ring->wptr);
4047
4048         /* set the wb address wether it's enabled or not */
4049         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4050         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4051
4052         /* scratch register shadowing is no longer supported */
4053         WREG32(SCRATCH_UMSK, 0);
4054
4055         if (!rdev->wb.enabled)
4056                 tmp |= RB_NO_UPDATE;
4057
4058         mdelay(1);
4059         WREG32(CP_RB0_CNTL, tmp);
4060
4061         rb_addr = ring->gpu_addr >> 8;
4062         WREG32(CP_RB0_BASE, rb_addr);
4063         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4064
4065         /* start the ring */
4066         cik_cp_gfx_start(rdev);
4067         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4068         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4069         if (r) {
4070                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4071                 return r;
4072         }
4073
4074         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4075                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4076
4077         return 0;
4078 }
4079
4080 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4081                      struct radeon_ring *ring)
4082 {
4083         u32 rptr;
4084
4085         if (rdev->wb.enabled)
4086                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4087         else
4088                 rptr = RREG32(CP_RB0_RPTR);
4089
4090         return rptr;
4091 }
4092
4093 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4094                      struct radeon_ring *ring)
4095 {
4096         u32 wptr;
4097
4098         wptr = RREG32(CP_RB0_WPTR);
4099
4100         return wptr;
4101 }
4102
4103 void cik_gfx_set_wptr(struct radeon_device *rdev,
4104                       struct radeon_ring *ring)
4105 {
4106         WREG32(CP_RB0_WPTR, ring->wptr);
4107         (void)RREG32(CP_RB0_WPTR);
4108 }
4109
4110 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4111                          struct radeon_ring *ring)
4112 {
4113         u32 rptr;
4114
4115         if (rdev->wb.enabled) {
4116                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4117         } else {
4118                 mutex_lock(&rdev->srbm_mutex);
4119                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4120                 rptr = RREG32(CP_HQD_PQ_RPTR);
4121                 cik_srbm_select(rdev, 0, 0, 0, 0);
4122                 mutex_unlock(&rdev->srbm_mutex);
4123         }
4124
4125         return rptr;
4126 }
4127
4128 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4129                          struct radeon_ring *ring)
4130 {
4131         u32 wptr;
4132
4133         if (rdev->wb.enabled) {
4134                 /* XXX check if swapping is necessary on BE */
4135                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4136         } else {
4137                 mutex_lock(&rdev->srbm_mutex);
4138                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4139                 wptr = RREG32(CP_HQD_PQ_WPTR);
4140                 cik_srbm_select(rdev, 0, 0, 0, 0);
4141                 mutex_unlock(&rdev->srbm_mutex);
4142         }
4143
4144         return wptr;
4145 }
4146
4147 void cik_compute_set_wptr(struct radeon_device *rdev,
4148                           struct radeon_ring *ring)
4149 {
4150         /* XXX check if swapping is necessary on BE */
4151         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4152         WDOORBELL32(ring->doorbell_index, ring->wptr);
4153 }
4154
4155 /**
4156  * cik_cp_compute_enable - enable/disable the compute CP MEs
4157  *
4158  * @rdev: radeon_device pointer
4159  * @enable: enable or disable the MEs
4160  *
4161  * Halts or unhalts the compute MEs.
4162  */
4163 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4164 {
4165         if (enable)
4166                 WREG32(CP_MEC_CNTL, 0);
4167         else {
4168                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4169                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4170                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4171         }
4172         udelay(50);
4173 }
4174
4175 /**
4176  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4177  *
4178  * @rdev: radeon_device pointer
4179  *
4180  * Loads the compute MEC1&2 ucode.
4181  * Returns 0 for success, -EINVAL if the ucode is not available.
4182  */
4183 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4184 {
4185         const __be32 *fw_data;
4186         int i;
4187
4188         if (!rdev->mec_fw)
4189                 return -EINVAL;
4190
4191         cik_cp_compute_enable(rdev, false);
4192
4193         /* MEC1 */
4194         fw_data = (const __be32 *)rdev->mec_fw->data;
4195         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4196         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4197                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4198         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4199
4200         if (rdev->family == CHIP_KAVERI) {
4201                 /* MEC2 */
4202                 fw_data = (const __be32 *)rdev->mec_fw->data;
4203                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4204                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4205                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4206                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4207         }
4208
4209         return 0;
4210 }
4211
4212 /**
4213  * cik_cp_compute_start - start the compute queues
4214  *
4215  * @rdev: radeon_device pointer
4216  *
4217  * Enable the compute queues.
4218  * Returns 0 for success, error for failure.
4219  */
4220 static int cik_cp_compute_start(struct radeon_device *rdev)
4221 {
4222         cik_cp_compute_enable(rdev, true);
4223
4224         return 0;
4225 }
4226
4227 /**
4228  * cik_cp_compute_fini - stop the compute queues
4229  *
4230  * @rdev: radeon_device pointer
4231  *
4232  * Stop the compute queues and tear down the driver queue
4233  * info.
4234  */
4235 static void cik_cp_compute_fini(struct radeon_device *rdev)
4236 {
4237         int i, idx, r;
4238
4239         cik_cp_compute_enable(rdev, false);
4240
4241         for (i = 0; i < 2; i++) {
4242                 if (i == 0)
4243                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4244                 else
4245                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4246
4247                 if (rdev->ring[idx].mqd_obj) {
4248                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4249                         if (unlikely(r != 0))
4250                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4251
4252                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4253                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4254
4255                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4256                         rdev->ring[idx].mqd_obj = NULL;
4257                 }
4258         }
4259 }
4260
4261 static void cik_mec_fini(struct radeon_device *rdev)
4262 {
4263         int r;
4264
4265         if (rdev->mec.hpd_eop_obj) {
4266                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4267                 if (unlikely(r != 0))
4268                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4269                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4270                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4271
4272                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4273                 rdev->mec.hpd_eop_obj = NULL;
4274         }
4275 }
4276
4277 #define MEC_HPD_SIZE 2048
4278
4279 static int cik_mec_init(struct radeon_device *rdev)
4280 {
4281         int r;
4282         u32 *hpd;
4283
4284         /*
4285          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4286          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4287          */
4288         if (rdev->family == CHIP_KAVERI)
4289                 rdev->mec.num_mec = 2;
4290         else
4291                 rdev->mec.num_mec = 1;
4292         rdev->mec.num_pipe = 4;
4293         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4294
4295         if (rdev->mec.hpd_eop_obj == NULL) {
4296                 r = radeon_bo_create(rdev,
4297                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4298                                      PAGE_SIZE, true,
4299                                      RADEON_GEM_DOMAIN_GTT, NULL,
4300                                      &rdev->mec.hpd_eop_obj);
4301                 if (r) {
4302                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4303                         return r;
4304                 }
4305         }
4306
4307         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4308         if (unlikely(r != 0)) {
4309                 cik_mec_fini(rdev);
4310                 return r;
4311         }
4312         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4313                           &rdev->mec.hpd_eop_gpu_addr);
4314         if (r) {
4315                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4316                 cik_mec_fini(rdev);
4317                 return r;
4318         }
4319         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4320         if (r) {
4321                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4322                 cik_mec_fini(rdev);
4323                 return r;
4324         }
4325
4326         /* clear memory.  Not sure if this is required or not */
4327         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4328
4329         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4330         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4331
4332         return 0;
4333 }
4334
4335 struct hqd_registers
4336 {
4337         u32 cp_mqd_base_addr;
4338         u32 cp_mqd_base_addr_hi;
4339         u32 cp_hqd_active;
4340         u32 cp_hqd_vmid;
4341         u32 cp_hqd_persistent_state;
4342         u32 cp_hqd_pipe_priority;
4343         u32 cp_hqd_queue_priority;
4344         u32 cp_hqd_quantum;
4345         u32 cp_hqd_pq_base;
4346         u32 cp_hqd_pq_base_hi;
4347         u32 cp_hqd_pq_rptr;
4348         u32 cp_hqd_pq_rptr_report_addr;
4349         u32 cp_hqd_pq_rptr_report_addr_hi;
4350         u32 cp_hqd_pq_wptr_poll_addr;
4351         u32 cp_hqd_pq_wptr_poll_addr_hi;
4352         u32 cp_hqd_pq_doorbell_control;
4353         u32 cp_hqd_pq_wptr;
4354         u32 cp_hqd_pq_control;
4355         u32 cp_hqd_ib_base_addr;
4356         u32 cp_hqd_ib_base_addr_hi;
4357         u32 cp_hqd_ib_rptr;
4358         u32 cp_hqd_ib_control;
4359         u32 cp_hqd_iq_timer;
4360         u32 cp_hqd_iq_rptr;
4361         u32 cp_hqd_dequeue_request;
4362         u32 cp_hqd_dma_offload;
4363         u32 cp_hqd_sema_cmd;
4364         u32 cp_hqd_msg_type;
4365         u32 cp_hqd_atomic0_preop_lo;
4366         u32 cp_hqd_atomic0_preop_hi;
4367         u32 cp_hqd_atomic1_preop_lo;
4368         u32 cp_hqd_atomic1_preop_hi;
4369         u32 cp_hqd_hq_scheduler0;
4370         u32 cp_hqd_hq_scheduler1;
4371         u32 cp_mqd_control;
4372 };
4373
4374 struct bonaire_mqd
4375 {
4376         u32 header;
4377         u32 dispatch_initiator;
4378         u32 dimensions[3];
4379         u32 start_idx[3];
4380         u32 num_threads[3];
4381         u32 pipeline_stat_enable;
4382         u32 perf_counter_enable;
4383         u32 pgm[2];
4384         u32 tba[2];
4385         u32 tma[2];
4386         u32 pgm_rsrc[2];
4387         u32 vmid;
4388         u32 resource_limits;
4389         u32 static_thread_mgmt01[2];
4390         u32 tmp_ring_size;
4391         u32 static_thread_mgmt23[2];
4392         u32 restart[3];
4393         u32 thread_trace_enable;
4394         u32 reserved1;
4395         u32 user_data[16];
4396         u32 vgtcs_invoke_count[2];
4397         struct hqd_registers queue_state;
4398         u32 dequeue_cntr;
4399         u32 interrupt_queue[64];
4400 };
4401
4402 /**
4403  * cik_cp_compute_resume - setup the compute queue registers
4404  *
4405  * @rdev: radeon_device pointer
4406  *
4407  * Program the compute queues and test them to make sure they
4408  * are working.
4409  * Returns 0 for success, error for failure.
4410  */
4411 static int cik_cp_compute_resume(struct radeon_device *rdev)
4412 {
4413         int r, i, idx;
4414         u32 tmp;
4415         bool use_doorbell = true;
4416         u64 hqd_gpu_addr;
4417         u64 mqd_gpu_addr;
4418         u64 eop_gpu_addr;
4419         u64 wb_gpu_addr;
4420         u32 *buf;
4421         struct bonaire_mqd *mqd;
4422
4423         r = cik_cp_compute_start(rdev);
4424         if (r)
4425                 return r;
4426
4427         /* fix up chicken bits */
4428         tmp = RREG32(CP_CPF_DEBUG);
4429         tmp |= (1 << 23);
4430         WREG32(CP_CPF_DEBUG, tmp);
4431
4432         /* init the pipes */
4433         mutex_lock(&rdev->srbm_mutex);
4434         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4435                 int me = (i < 4) ? 1 : 2;
4436                 int pipe = (i < 4) ? i : (i - 4);
4437
4438                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4439
4440                 cik_srbm_select(rdev, me, pipe, 0, 0);
4441
4442                 /* write the EOP addr */
4443                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4444                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4445
4446                 /* set the VMID assigned */
4447                 WREG32(CP_HPD_EOP_VMID, 0);
4448
4449                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4450                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4451                 tmp &= ~EOP_SIZE_MASK;
4452                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4453                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4454         }
4455         cik_srbm_select(rdev, 0, 0, 0, 0);
4456         mutex_unlock(&rdev->srbm_mutex);
4457
4458         /* init the queues.  Just two for now. */
4459         for (i = 0; i < 2; i++) {
4460                 if (i == 0)
4461                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4462                 else
4463                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4464
4465                 if (rdev->ring[idx].mqd_obj == NULL) {
4466                         r = radeon_bo_create(rdev,
4467                                              sizeof(struct bonaire_mqd),
4468                                              PAGE_SIZE, true,
4469                                              RADEON_GEM_DOMAIN_GTT, NULL,
4470                                              &rdev->ring[idx].mqd_obj);
4471                         if (r) {
4472                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4473                                 return r;
4474                         }
4475                 }
4476
4477                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4478                 if (unlikely(r != 0)) {
4479                         cik_cp_compute_fini(rdev);
4480                         return r;
4481                 }
4482                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4483                                   &mqd_gpu_addr);
4484                 if (r) {
4485                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4486                         cik_cp_compute_fini(rdev);
4487                         return r;
4488                 }
4489                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4490                 if (r) {
4491                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4492                         cik_cp_compute_fini(rdev);
4493                         return r;
4494                 }
4495
4496                 /* init the mqd struct */
4497                 memset(buf, 0, sizeof(struct bonaire_mqd));
4498
4499                 mqd = (struct bonaire_mqd *)buf;
4500                 mqd->header = 0xC0310800;
4501                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4502                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4503                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4504                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4505
4506                 mutex_lock(&rdev->srbm_mutex);
4507                 cik_srbm_select(rdev, rdev->ring[idx].me,
4508                                 rdev->ring[idx].pipe,
4509                                 rdev->ring[idx].queue, 0);
4510
4511                 /* disable wptr polling */
4512                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4513                 tmp &= ~WPTR_POLL_EN;
4514                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4515
4516                 /* enable doorbell? */
4517                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4518                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4519                 if (use_doorbell)
4520                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4521                 else
4522                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4523                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4524                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4525
4526                 /* disable the queue if it's active */
4527                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4528                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4529                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4530                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4531                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4532                         for (i = 0; i < rdev->usec_timeout; i++) {
4533                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4534                                         break;
4535                                 udelay(1);
4536                         }
4537                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4538                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4539                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4540                 }
4541
4542                 /* set the pointer to the MQD */
4543                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4544                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4545                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4546                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4547                 /* set MQD vmid to 0 */
4548                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4549                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4550                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4551
4552                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4553                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4554                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4555                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4556                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4557                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4558
4559                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4560                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4561                 mqd->queue_state.cp_hqd_pq_control &=
4562                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4563
4564                 mqd->queue_state.cp_hqd_pq_control |=
4565                         order_base_2(rdev->ring[idx].ring_size / 8);
4566                 mqd->queue_state.cp_hqd_pq_control |=
4567                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4568 #ifdef __BIG_ENDIAN
4569                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4570 #endif
4571                 mqd->queue_state.cp_hqd_pq_control &=
4572                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4573                 mqd->queue_state.cp_hqd_pq_control |=
4574                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4575                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4576
4577                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4578                 if (i == 0)
4579                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4580                 else
4581                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4582                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4583                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4584                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4585                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4586                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4587
4588                 /* set the wb address wether it's enabled or not */
4589                 if (i == 0)
4590                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4591                 else
4592                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4593                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4594                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4595                         upper_32_bits(wb_gpu_addr) & 0xffff;
4596                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4597                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4598                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4599                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4600
4601                 /* enable the doorbell if requested */
4602                 if (use_doorbell) {
4603                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4604                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4605                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4606                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4607                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4608                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4609                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4610                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4611
4612                 } else {
4613                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4614                 }
4615                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4616                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4617
4618                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4619                 rdev->ring[idx].wptr = 0;
4620                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4621                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4622                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4623
4624                 /* set the vmid for the queue */
4625                 mqd->queue_state.cp_hqd_vmid = 0;
4626                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4627
4628                 /* activate the queue */
4629                 mqd->queue_state.cp_hqd_active = 1;
4630                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4631
4632                 cik_srbm_select(rdev, 0, 0, 0, 0);
4633                 mutex_unlock(&rdev->srbm_mutex);
4634
4635                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4636                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4637
4638                 rdev->ring[idx].ready = true;
4639                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4640                 if (r)
4641                         rdev->ring[idx].ready = false;
4642         }
4643
4644         return 0;
4645 }
4646
4647 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4648 {
4649         cik_cp_gfx_enable(rdev, enable);
4650         cik_cp_compute_enable(rdev, enable);
4651 }
4652
4653 static int cik_cp_load_microcode(struct radeon_device *rdev)
4654 {
4655         int r;
4656
4657         r = cik_cp_gfx_load_microcode(rdev);
4658         if (r)
4659                 return r;
4660         r = cik_cp_compute_load_microcode(rdev);
4661         if (r)
4662                 return r;
4663
4664         return 0;
4665 }
4666
4667 static void cik_cp_fini(struct radeon_device *rdev)
4668 {
4669         cik_cp_gfx_fini(rdev);
4670         cik_cp_compute_fini(rdev);
4671 }
4672
4673 static int cik_cp_resume(struct radeon_device *rdev)
4674 {
4675         int r;
4676
4677         cik_enable_gui_idle_interrupt(rdev, false);
4678
4679         r = cik_cp_load_microcode(rdev);
4680         if (r)
4681                 return r;
4682
4683         r = cik_cp_gfx_resume(rdev);
4684         if (r)
4685                 return r;
4686         r = cik_cp_compute_resume(rdev);
4687         if (r)
4688                 return r;
4689
4690         cik_enable_gui_idle_interrupt(rdev, true);
4691
4692         return 0;
4693 }
4694
4695 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4696 {
4697         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4698                 RREG32(GRBM_STATUS));
4699         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4700                 RREG32(GRBM_STATUS2));
4701         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4702                 RREG32(GRBM_STATUS_SE0));
4703         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4704                 RREG32(GRBM_STATUS_SE1));
4705         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4706                 RREG32(GRBM_STATUS_SE2));
4707         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4708                 RREG32(GRBM_STATUS_SE3));
4709         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4710                 RREG32(SRBM_STATUS));
4711         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4712                 RREG32(SRBM_STATUS2));
4713         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4714                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4715         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4716                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4717         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4718         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4719                  RREG32(CP_STALLED_STAT1));
4720         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4721                  RREG32(CP_STALLED_STAT2));
4722         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4723                  RREG32(CP_STALLED_STAT3));
4724         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4725                  RREG32(CP_CPF_BUSY_STAT));
4726         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4727                  RREG32(CP_CPF_STALLED_STAT1));
4728         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4729         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4730         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4731                  RREG32(CP_CPC_STALLED_STAT1));
4732         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4733 }
4734
4735 /**
4736  * cik_gpu_check_soft_reset - check which blocks are busy
4737  *
4738  * @rdev: radeon_device pointer
4739  *
4740  * Check which blocks are busy and return the relevant reset
4741  * mask to be used by cik_gpu_soft_reset().
4742  * Returns a mask of the blocks to be reset.
4743  */
4744 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4745 {
4746         u32 reset_mask = 0;
4747         u32 tmp;
4748
4749         /* GRBM_STATUS */
4750         tmp = RREG32(GRBM_STATUS);
4751         if (tmp & (PA_BUSY | SC_BUSY |
4752                    BCI_BUSY | SX_BUSY |
4753                    TA_BUSY | VGT_BUSY |
4754                    DB_BUSY | CB_BUSY |
4755                    GDS_BUSY | SPI_BUSY |
4756                    IA_BUSY | IA_BUSY_NO_DMA))
4757                 reset_mask |= RADEON_RESET_GFX;
4758
4759         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4760                 reset_mask |= RADEON_RESET_CP;
4761
4762         /* GRBM_STATUS2 */
4763         tmp = RREG32(GRBM_STATUS2);
4764         if (tmp & RLC_BUSY)
4765                 reset_mask |= RADEON_RESET_RLC;
4766
4767         /* SDMA0_STATUS_REG */
4768         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4769         if (!(tmp & SDMA_IDLE))
4770                 reset_mask |= RADEON_RESET_DMA;
4771
4772         /* SDMA1_STATUS_REG */
4773         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4774         if (!(tmp & SDMA_IDLE))
4775                 reset_mask |= RADEON_RESET_DMA1;
4776
4777         /* SRBM_STATUS2 */
4778         tmp = RREG32(SRBM_STATUS2);
4779         if (tmp & SDMA_BUSY)
4780                 reset_mask |= RADEON_RESET_DMA;
4781
4782         if (tmp & SDMA1_BUSY)
4783                 reset_mask |= RADEON_RESET_DMA1;
4784
4785         /* SRBM_STATUS */
4786         tmp = RREG32(SRBM_STATUS);
4787
4788         if (tmp & IH_BUSY)
4789                 reset_mask |= RADEON_RESET_IH;
4790
4791         if (tmp & SEM_BUSY)
4792                 reset_mask |= RADEON_RESET_SEM;
4793
4794         if (tmp & GRBM_RQ_PENDING)
4795                 reset_mask |= RADEON_RESET_GRBM;
4796
4797         if (tmp & VMC_BUSY)
4798                 reset_mask |= RADEON_RESET_VMC;
4799
4800         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4801                    MCC_BUSY | MCD_BUSY))
4802                 reset_mask |= RADEON_RESET_MC;
4803
4804         if (evergreen_is_display_hung(rdev))
4805                 reset_mask |= RADEON_RESET_DISPLAY;
4806
4807         /* Skip MC reset as it's mostly likely not hung, just busy */
4808         if (reset_mask & RADEON_RESET_MC) {
4809                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4810                 reset_mask &= ~RADEON_RESET_MC;
4811         }
4812
4813         return reset_mask;
4814 }
4815
4816 /**
4817  * cik_gpu_soft_reset - soft reset GPU
4818  *
4819  * @rdev: radeon_device pointer
4820  * @reset_mask: mask of which blocks to reset
4821  *
4822  * Soft reset the blocks specified in @reset_mask.
4823  */
4824 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4825 {
4826         struct evergreen_mc_save save;
4827         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4828         u32 tmp;
4829
4830         if (reset_mask == 0)
4831                 return;
4832
4833         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4834
4835         cik_print_gpu_status_regs(rdev);
4836         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4837                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4838         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4839                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4840
4841         /* disable CG/PG */
4842         cik_fini_pg(rdev);
4843         cik_fini_cg(rdev);
4844
4845         /* stop the rlc */
4846         cik_rlc_stop(rdev);
4847
4848         /* Disable GFX parsing/prefetching */
4849         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4850
4851         /* Disable MEC parsing/prefetching */
4852         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4853
4854         if (reset_mask & RADEON_RESET_DMA) {
4855                 /* sdma0 */
4856                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4857                 tmp |= SDMA_HALT;
4858                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4859         }
4860         if (reset_mask & RADEON_RESET_DMA1) {
4861                 /* sdma1 */
4862                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4863                 tmp |= SDMA_HALT;
4864                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4865         }
4866
4867         evergreen_mc_stop(rdev, &save);
4868         if (evergreen_mc_wait_for_idle(rdev)) {
4869                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4870         }
4871
4872         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4873                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4874
4875         if (reset_mask & RADEON_RESET_CP) {
4876                 grbm_soft_reset |= SOFT_RESET_CP;
4877
4878                 srbm_soft_reset |= SOFT_RESET_GRBM;
4879         }
4880
4881         if (reset_mask & RADEON_RESET_DMA)
4882                 srbm_soft_reset |= SOFT_RESET_SDMA;
4883
4884         if (reset_mask & RADEON_RESET_DMA1)
4885                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4886
4887         if (reset_mask & RADEON_RESET_DISPLAY)
4888                 srbm_soft_reset |= SOFT_RESET_DC;
4889
4890         if (reset_mask & RADEON_RESET_RLC)
4891                 grbm_soft_reset |= SOFT_RESET_RLC;
4892
4893         if (reset_mask & RADEON_RESET_SEM)
4894                 srbm_soft_reset |= SOFT_RESET_SEM;
4895
4896         if (reset_mask & RADEON_RESET_IH)
4897                 srbm_soft_reset |= SOFT_RESET_IH;
4898
4899         if (reset_mask & RADEON_RESET_GRBM)
4900                 srbm_soft_reset |= SOFT_RESET_GRBM;
4901
4902         if (reset_mask & RADEON_RESET_VMC)
4903                 srbm_soft_reset |= SOFT_RESET_VMC;
4904
4905         if (!(rdev->flags & RADEON_IS_IGP)) {
4906                 if (reset_mask & RADEON_RESET_MC)
4907                         srbm_soft_reset |= SOFT_RESET_MC;
4908         }
4909
4910         if (grbm_soft_reset) {
4911                 tmp = RREG32(GRBM_SOFT_RESET);
4912                 tmp |= grbm_soft_reset;
4913                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4914                 WREG32(GRBM_SOFT_RESET, tmp);
4915                 tmp = RREG32(GRBM_SOFT_RESET);
4916
4917                 udelay(50);
4918
4919                 tmp &= ~grbm_soft_reset;
4920                 WREG32(GRBM_SOFT_RESET, tmp);
4921                 tmp = RREG32(GRBM_SOFT_RESET);
4922         }
4923
4924         if (srbm_soft_reset) {
4925                 tmp = RREG32(SRBM_SOFT_RESET);
4926                 tmp |= srbm_soft_reset;
4927                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4928                 WREG32(SRBM_SOFT_RESET, tmp);
4929                 tmp = RREG32(SRBM_SOFT_RESET);
4930
4931                 udelay(50);
4932
4933                 tmp &= ~srbm_soft_reset;
4934                 WREG32(SRBM_SOFT_RESET, tmp);
4935                 tmp = RREG32(SRBM_SOFT_RESET);
4936         }
4937
4938         /* Wait a little for things to settle down */
4939         udelay(50);
4940
4941         evergreen_mc_resume(rdev, &save);
4942         udelay(50);
4943
4944         cik_print_gpu_status_regs(rdev);
4945 }
4946
4947 struct kv_reset_save_regs {
4948         u32 gmcon_reng_execute;
4949         u32 gmcon_misc;
4950         u32 gmcon_misc3;
4951 };
4952
4953 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4954                                    struct kv_reset_save_regs *save)
4955 {
4956         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4957         save->gmcon_misc = RREG32(GMCON_MISC);
4958         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4959
4960         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4961         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4962                                                 STCTRL_STUTTER_EN));
4963 }
4964
4965 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4966                                       struct kv_reset_save_regs *save)
4967 {
4968         int i;
4969
4970         WREG32(GMCON_PGFSM_WRITE, 0);
4971         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4972
4973         for (i = 0; i < 5; i++)
4974                 WREG32(GMCON_PGFSM_WRITE, 0);
4975
4976         WREG32(GMCON_PGFSM_WRITE, 0);
4977         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4978
4979         for (i = 0; i < 5; i++)
4980                 WREG32(GMCON_PGFSM_WRITE, 0);
4981
4982         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4983         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4984
4985         for (i = 0; i < 5; i++)
4986                 WREG32(GMCON_PGFSM_WRITE, 0);
4987
4988         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4989         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4990
4991         for (i = 0; i < 5; i++)
4992                 WREG32(GMCON_PGFSM_WRITE, 0);
4993
4994         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4995         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4996
4997         for (i = 0; i < 5; i++)
4998                 WREG32(GMCON_PGFSM_WRITE, 0);
4999
5000         WREG32(GMCON_PGFSM_WRITE, 0);
5001         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5002
5003         for (i = 0; i < 5; i++)
5004                 WREG32(GMCON_PGFSM_WRITE, 0);
5005
5006         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5007         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5008
5009         for (i = 0; i < 5; i++)
5010                 WREG32(GMCON_PGFSM_WRITE, 0);
5011
5012         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5013         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5014
5015         for (i = 0; i < 5; i++)
5016                 WREG32(GMCON_PGFSM_WRITE, 0);
5017
5018         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5019         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5020
5021         for (i = 0; i < 5; i++)
5022                 WREG32(GMCON_PGFSM_WRITE, 0);
5023
5024         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5025         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5026
5027         for (i = 0; i < 5; i++)
5028                 WREG32(GMCON_PGFSM_WRITE, 0);
5029
5030         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5031         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5032
5033         WREG32(GMCON_MISC3, save->gmcon_misc3);
5034         WREG32(GMCON_MISC, save->gmcon_misc);
5035         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5036 }
5037
5038 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5039 {
5040         struct evergreen_mc_save save;
5041         struct kv_reset_save_regs kv_save = { 0 };
5042         u32 tmp, i;
5043
5044         dev_info(rdev->dev, "GPU pci config reset\n");
5045
5046         /* disable dpm? */
5047
5048         /* disable cg/pg */
5049         cik_fini_pg(rdev);
5050         cik_fini_cg(rdev);
5051
5052         /* Disable GFX parsing/prefetching */
5053         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5054
5055         /* Disable MEC parsing/prefetching */
5056         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5057
5058         /* sdma0 */
5059         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5060         tmp |= SDMA_HALT;
5061         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5062         /* sdma1 */
5063         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5064         tmp |= SDMA_HALT;
5065         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5066         /* XXX other engines? */
5067
5068         /* halt the rlc, disable cp internal ints */
5069         cik_rlc_stop(rdev);
5070
5071         udelay(50);
5072
5073         /* disable mem access */
5074         evergreen_mc_stop(rdev, &save);
5075         if (evergreen_mc_wait_for_idle(rdev)) {
5076                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5077         }
5078
5079         if (rdev->flags & RADEON_IS_IGP)
5080                 kv_save_regs_for_reset(rdev, &kv_save);
5081
5082         /* disable BM */
5083         pci_clear_master(rdev->pdev);
5084         /* reset */
5085         radeon_pci_config_reset(rdev);
5086
5087         udelay(100);
5088
5089         /* wait for asic to come out of reset */
5090         for (i = 0; i < rdev->usec_timeout; i++) {
5091                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5092                         break;
5093                 udelay(1);
5094         }
5095
5096         /* does asic init need to be run first??? */
5097         if (rdev->flags & RADEON_IS_IGP)
5098                 kv_restore_regs_for_reset(rdev, &kv_save);
5099 }
5100
5101 /**
5102  * cik_asic_reset - soft reset GPU
5103  *
5104  * @rdev: radeon_device pointer
5105  *
5106  * Look up which blocks are hung and attempt
5107  * to reset them.
5108  * Returns 0 for success.
5109  */
5110 int cik_asic_reset(struct radeon_device *rdev)
5111 {
5112         u32 reset_mask;
5113
5114         reset_mask = cik_gpu_check_soft_reset(rdev);
5115
5116         if (reset_mask)
5117                 r600_set_bios_scratch_engine_hung(rdev, true);
5118
5119         /* try soft reset */
5120         cik_gpu_soft_reset(rdev, reset_mask);
5121
5122         reset_mask = cik_gpu_check_soft_reset(rdev);
5123
5124         /* try pci config reset */
5125         if (reset_mask && radeon_hard_reset)
5126                 cik_gpu_pci_config_reset(rdev);
5127
5128         reset_mask = cik_gpu_check_soft_reset(rdev);
5129
5130         if (!reset_mask)
5131                 r600_set_bios_scratch_engine_hung(rdev, false);
5132
5133         return 0;
5134 }
5135
5136 /**
5137  * cik_gfx_is_lockup - check if the 3D engine is locked up
5138  *
5139  * @rdev: radeon_device pointer
5140  * @ring: radeon_ring structure holding ring information
5141  *
5142  * Check if the 3D engine is locked up (CIK).
5143  * Returns true if the engine is locked, false if not.
5144  */
5145 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5146 {
5147         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5148
5149         if (!(reset_mask & (RADEON_RESET_GFX |
5150                             RADEON_RESET_COMPUTE |
5151                             RADEON_RESET_CP))) {
5152                 radeon_ring_lockup_update(rdev, ring);
5153                 return false;
5154         }
5155         return radeon_ring_test_lockup(rdev, ring);
5156 }
5157
5158 /* MC */
5159 /**
5160  * cik_mc_program - program the GPU memory controller
5161  *
5162  * @rdev: radeon_device pointer
5163  *
5164  * Set the location of vram, gart, and AGP in the GPU's
5165  * physical address space (CIK).
5166  */
5167 static void cik_mc_program(struct radeon_device *rdev)
5168 {
5169         struct evergreen_mc_save save;
5170         u32 tmp;
5171         int i, j;
5172
5173         /* Initialize HDP */
5174         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5175                 WREG32((0x2c14 + j), 0x00000000);
5176                 WREG32((0x2c18 + j), 0x00000000);
5177                 WREG32((0x2c1c + j), 0x00000000);
5178                 WREG32((0x2c20 + j), 0x00000000);
5179                 WREG32((0x2c24 + j), 0x00000000);
5180         }
5181         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5182
5183         evergreen_mc_stop(rdev, &save);
5184         if (radeon_mc_wait_for_idle(rdev)) {
5185                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5186         }
5187         /* Lockout access through VGA aperture*/
5188         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5189         /* Update configuration */
5190         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5191                rdev->mc.vram_start >> 12);
5192         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5193                rdev->mc.vram_end >> 12);
5194         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5195                rdev->vram_scratch.gpu_addr >> 12);
5196         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5197         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5198         WREG32(MC_VM_FB_LOCATION, tmp);
5199         /* XXX double check these! */
5200         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5201         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5202         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5203         WREG32(MC_VM_AGP_BASE, 0);
5204         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5205         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5206         if (radeon_mc_wait_for_idle(rdev)) {
5207                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5208         }
5209         evergreen_mc_resume(rdev, &save);
5210         /* we need to own VRAM, so turn off the VGA renderer here
5211          * to stop it overwriting our objects */
5212         rv515_vga_render_disable(rdev);
5213 }
5214
5215 /**
5216  * cik_mc_init - initialize the memory controller driver params
5217  *
5218  * @rdev: radeon_device pointer
5219  *
5220  * Look up the amount of vram, vram width, and decide how to place
5221  * vram and gart within the GPU's physical address space (CIK).
5222  * Returns 0 for success.
5223  */
5224 static int cik_mc_init(struct radeon_device *rdev)
5225 {
5226         u32 tmp;
5227         int chansize, numchan;
5228
5229         /* Get VRAM informations */
5230         rdev->mc.vram_is_ddr = true;
5231         tmp = RREG32(MC_ARB_RAMCFG);
5232         if (tmp & CHANSIZE_MASK) {
5233                 chansize = 64;
5234         } else {
5235                 chansize = 32;
5236         }
5237         tmp = RREG32(MC_SHARED_CHMAP);
5238         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5239         case 0:
5240         default:
5241                 numchan = 1;
5242                 break;
5243         case 1:
5244                 numchan = 2;
5245                 break;
5246         case 2:
5247                 numchan = 4;
5248                 break;
5249         case 3:
5250                 numchan = 8;
5251                 break;
5252         case 4:
5253                 numchan = 3;
5254                 break;
5255         case 5:
5256                 numchan = 6;
5257                 break;
5258         case 6:
5259                 numchan = 10;
5260                 break;
5261         case 7:
5262                 numchan = 12;
5263                 break;
5264         case 8:
5265                 numchan = 16;
5266                 break;
5267         }
5268         rdev->mc.vram_width = numchan * chansize;
5269         /* Could aper size report 0 ? */
5270         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5271         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5272         /* size in MB on si */
5273         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5274         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5275         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5276         si_vram_gtt_location(rdev, &rdev->mc);
5277         radeon_update_bandwidth_info(rdev);
5278
5279         return 0;
5280 }
5281
5282 /*
5283  * GART
5284  * VMID 0 is the physical GPU addresses as used by the kernel.
5285  * VMIDs 1-15 are used for userspace clients and are handled
5286  * by the radeon vm/hsa code.
5287  */
5288 /**
5289  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5290  *
5291  * @rdev: radeon_device pointer
5292  *
5293  * Flush the TLB for the VMID 0 page table (CIK).
5294  */
5295 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5296 {
5297         /* flush hdp cache */
5298         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5299
5300         /* bits 0-15 are the VM contexts0-15 */
5301         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5302 }
5303
5304 /**
5305  * cik_pcie_gart_enable - gart enable
5306  *
5307  * @rdev: radeon_device pointer
5308  *
5309  * This sets up the TLBs, programs the page tables for VMID0,
5310  * sets up the hw for VMIDs 1-15 which are allocated on
5311  * demand, and sets up the global locations for the LDS, GDS,
5312  * and GPUVM for FSA64 clients (CIK).
5313  * Returns 0 for success, errors for failure.
5314  */
5315 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5316 {
5317         int r, i;
5318
5319         if (rdev->gart.robj == NULL) {
5320                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5321                 return -EINVAL;
5322         }
5323         r = radeon_gart_table_vram_pin(rdev);
5324         if (r)
5325                 return r;
5326         radeon_gart_restore(rdev);
5327         /* Setup TLB control */
5328         WREG32(MC_VM_MX_L1_TLB_CNTL,
5329                (0xA << 7) |
5330                ENABLE_L1_TLB |
5331                ENABLE_L1_FRAGMENT_PROCESSING |
5332                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5333                ENABLE_ADVANCED_DRIVER_MODEL |
5334                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5335         /* Setup L2 cache */
5336         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5337                ENABLE_L2_FRAGMENT_PROCESSING |
5338                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5339                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5340                EFFECTIVE_L2_QUEUE_SIZE(7) |
5341                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5342         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5343         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5344                BANK_SELECT(4) |
5345                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5346         /* setup context0 */
5347         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5348         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5349         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5350         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5351                         (u32)(rdev->dummy_page.addr >> 12));
5352         WREG32(VM_CONTEXT0_CNTL2, 0);
5353         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5354                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5355
5356         WREG32(0x15D4, 0);
5357         WREG32(0x15D8, 0);
5358         WREG32(0x15DC, 0);
5359
5360         /* empty context1-15 */
5361         /* FIXME start with 4G, once using 2 level pt switch to full
5362          * vm size space
5363          */
5364         /* set vm size, must be a multiple of 4 */
5365         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5366         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5367         for (i = 1; i < 16; i++) {
5368                 if (i < 8)
5369                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5370                                rdev->gart.table_addr >> 12);
5371                 else
5372                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5373                                rdev->gart.table_addr >> 12);
5374         }
5375
5376         /* enable context1-15 */
5377         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5378                (u32)(rdev->dummy_page.addr >> 12));
5379         WREG32(VM_CONTEXT1_CNTL2, 4);
5380         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5381                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5382                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5383                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5384                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5385                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5386                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5387                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5388                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5389                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5390                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5391                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5392                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5393
5394         if (rdev->family == CHIP_KAVERI) {
5395                 u32 tmp = RREG32(CHUB_CONTROL);
5396                 tmp &= ~BYPASS_VM;
5397                 WREG32(CHUB_CONTROL, tmp);
5398         }
5399
5400         /* XXX SH_MEM regs */
5401         /* where to put LDS, scratch, GPUVM in FSA64 space */
5402         mutex_lock(&rdev->srbm_mutex);
5403         for (i = 0; i < 16; i++) {
5404                 cik_srbm_select(rdev, 0, 0, 0, i);
5405                 /* CP and shaders */
5406                 WREG32(SH_MEM_CONFIG, 0);
5407                 WREG32(SH_MEM_APE1_BASE, 1);
5408                 WREG32(SH_MEM_APE1_LIMIT, 0);
5409                 WREG32(SH_MEM_BASES, 0);
5410                 /* SDMA GFX */
5411                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5412                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5413                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5414                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5415                 /* XXX SDMA RLC - todo */
5416         }
5417         cik_srbm_select(rdev, 0, 0, 0, 0);
5418         mutex_unlock(&rdev->srbm_mutex);
5419
5420         cik_pcie_gart_tlb_flush(rdev);
5421         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5422                  (unsigned)(rdev->mc.gtt_size >> 20),
5423                  (unsigned long long)rdev->gart.table_addr);
5424         rdev->gart.ready = true;
5425         return 0;
5426 }
5427
5428 /**
5429  * cik_pcie_gart_disable - gart disable
5430  *
5431  * @rdev: radeon_device pointer
5432  *
5433  * This disables all VM page table (CIK).
5434  */
5435 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5436 {
5437         /* Disable all tables */
5438         WREG32(VM_CONTEXT0_CNTL, 0);
5439         WREG32(VM_CONTEXT1_CNTL, 0);
5440         /* Setup TLB control */
5441         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5442                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5443         /* Setup L2 cache */
5444         WREG32(VM_L2_CNTL,
5445                ENABLE_L2_FRAGMENT_PROCESSING |
5446                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5447                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5448                EFFECTIVE_L2_QUEUE_SIZE(7) |
5449                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5450         WREG32(VM_L2_CNTL2, 0);
5451         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5452                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5453         radeon_gart_table_vram_unpin(rdev);
5454 }
5455
5456 /**
5457  * cik_pcie_gart_fini - vm fini callback
5458  *
5459  * @rdev: radeon_device pointer
5460  *
5461  * Tears down the driver GART/VM setup (CIK).
5462  */
5463 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5464 {
5465         cik_pcie_gart_disable(rdev);
5466         radeon_gart_table_vram_free(rdev);
5467         radeon_gart_fini(rdev);
5468 }
5469
5470 /* vm parser */
5471 /**
5472  * cik_ib_parse - vm ib_parse callback
5473  *
5474  * @rdev: radeon_device pointer
5475  * @ib: indirect buffer pointer
5476  *
5477  * CIK uses hw IB checking so this is a nop (CIK).
5478  */
5479 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5480 {
5481         return 0;
5482 }
5483
5484 /*
5485  * vm
5486  * VMID 0 is the physical GPU addresses as used by the kernel.
5487  * VMIDs 1-15 are used for userspace clients and are handled
5488  * by the radeon vm/hsa code.
5489  */
5490 /**
5491  * cik_vm_init - cik vm init callback
5492  *
5493  * @rdev: radeon_device pointer
5494  *
5495  * Inits cik specific vm parameters (number of VMs, base of vram for
5496  * VMIDs 1-15) (CIK).
5497  * Returns 0 for success.
5498  */
5499 int cik_vm_init(struct radeon_device *rdev)
5500 {
5501         /* number of VMs */
5502         rdev->vm_manager.nvm = 16;
5503         /* base offset of vram pages */
5504         if (rdev->flags & RADEON_IS_IGP) {
5505                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5506                 tmp <<= 22;
5507                 rdev->vm_manager.vram_base_offset = tmp;
5508         } else
5509                 rdev->vm_manager.vram_base_offset = 0;
5510
5511         return 0;
5512 }
5513
5514 /**
5515  * cik_vm_fini - cik vm fini callback
5516  *
5517  * @rdev: radeon_device pointer
5518  *
5519  * Tear down any asic specific VM setup (CIK).
5520  */
5521 void cik_vm_fini(struct radeon_device *rdev)
5522 {
5523 }
5524
5525 /**
5526  * cik_vm_decode_fault - print human readable fault info
5527  *
5528  * @rdev: radeon_device pointer
5529  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5530  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5531  *
5532  * Print human readable fault information (CIK).
5533  */
5534 static void cik_vm_decode_fault(struct radeon_device *rdev,
5535                                 u32 status, u32 addr, u32 mc_client)
5536 {
5537         u32 mc_id;
5538         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5539         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5540         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5541                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5542
5543         if (rdev->family == CHIP_HAWAII)
5544                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5545         else
5546                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5547
5548         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5549                protections, vmid, addr,
5550                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5551                block, mc_client, mc_id);
5552 }
5553
5554 /**
5555  * cik_vm_flush - cik vm flush using the CP
5556  *
5557  * @rdev: radeon_device pointer
5558  *
5559  * Update the page table base and flush the VM TLB
5560  * using the CP (CIK).
5561  */
5562 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5563 {
5564         struct radeon_ring *ring = &rdev->ring[ridx];
5565
5566         if (vm == NULL)
5567                 return;
5568
5569         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5570         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5571                                  WRITE_DATA_DST_SEL(0)));
5572         if (vm->id < 8) {
5573                 radeon_ring_write(ring,
5574                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5575         } else {
5576                 radeon_ring_write(ring,
5577                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5578         }
5579         radeon_ring_write(ring, 0);
5580         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5581
5582         /* update SH_MEM_* regs */
5583         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5584         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5585                                  WRITE_DATA_DST_SEL(0)));
5586         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5587         radeon_ring_write(ring, 0);
5588         radeon_ring_write(ring, VMID(vm->id));
5589
5590         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5591         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5592                                  WRITE_DATA_DST_SEL(0)));
5593         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5594         radeon_ring_write(ring, 0);
5595
5596         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5597         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5598         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5599         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5600
5601         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5602         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5603                                  WRITE_DATA_DST_SEL(0)));
5604         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5605         radeon_ring_write(ring, 0);
5606         radeon_ring_write(ring, VMID(0));
5607
5608         /* HDP flush */
5609         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5610
5611         /* bits 0-15 are the VM contexts0-15 */
5612         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5613         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5614                                  WRITE_DATA_DST_SEL(0)));
5615         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5616         radeon_ring_write(ring, 0);
5617         radeon_ring_write(ring, 1 << vm->id);
5618
5619         /* compute doesn't have PFP */
5620         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5621                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5622                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5623                 radeon_ring_write(ring, 0x0);
5624         }
5625 }
5626
5627 /*
5628  * RLC
5629  * The RLC is a multi-purpose microengine that handles a
5630  * variety of functions, the most important of which is
5631  * the interrupt controller.
5632  */
5633 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5634                                           bool enable)
5635 {
5636         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5637
5638         if (enable)
5639                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5640         else
5641                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5642         WREG32(CP_INT_CNTL_RING0, tmp);
5643 }
5644
5645 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5646 {
5647         u32 tmp;
5648
5649         tmp = RREG32(RLC_LB_CNTL);
5650         if (enable)
5651                 tmp |= LOAD_BALANCE_ENABLE;
5652         else
5653                 tmp &= ~LOAD_BALANCE_ENABLE;
5654         WREG32(RLC_LB_CNTL, tmp);
5655 }
5656
5657 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5658 {
5659         u32 i, j, k;
5660         u32 mask;
5661
5662         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5663                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5664                         cik_select_se_sh(rdev, i, j);
5665                         for (k = 0; k < rdev->usec_timeout; k++) {
5666                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5667                                         break;
5668                                 udelay(1);
5669                         }
5670                 }
5671         }
5672         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5673
5674         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5675         for (k = 0; k < rdev->usec_timeout; k++) {
5676                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5677                         break;
5678                 udelay(1);
5679         }
5680 }
5681
5682 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5683 {
5684         u32 tmp;
5685
5686         tmp = RREG32(RLC_CNTL);
5687         if (tmp != rlc)
5688                 WREG32(RLC_CNTL, rlc);
5689 }
5690
5691 static u32 cik_halt_rlc(struct radeon_device *rdev)
5692 {
5693         u32 data, orig;
5694
5695         orig = data = RREG32(RLC_CNTL);
5696
5697         if (data & RLC_ENABLE) {
5698                 u32 i;
5699
5700                 data &= ~RLC_ENABLE;
5701                 WREG32(RLC_CNTL, data);
5702
5703                 for (i = 0; i < rdev->usec_timeout; i++) {
5704                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5705                                 break;
5706                         udelay(1);
5707                 }
5708
5709                 cik_wait_for_rlc_serdes(rdev);
5710         }
5711
5712         return orig;
5713 }
5714
5715 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5716 {
5717         u32 tmp, i, mask;
5718
5719         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5720         WREG32(RLC_GPR_REG2, tmp);
5721
5722         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5723         for (i = 0; i < rdev->usec_timeout; i++) {
5724                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5725                         break;
5726                 udelay(1);
5727         }
5728
5729         for (i = 0; i < rdev->usec_timeout; i++) {
5730                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5731                         break;
5732                 udelay(1);
5733         }
5734 }
5735
5736 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5737 {
5738         u32 tmp;
5739
5740         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5741         WREG32(RLC_GPR_REG2, tmp);
5742 }
5743
5744 /**
5745  * cik_rlc_stop - stop the RLC ME
5746  *
5747  * @rdev: radeon_device pointer
5748  *
5749  * Halt the RLC ME (MicroEngine) (CIK).
5750  */
5751 static void cik_rlc_stop(struct radeon_device *rdev)
5752 {
5753         WREG32(RLC_CNTL, 0);
5754
5755         cik_enable_gui_idle_interrupt(rdev, false);
5756
5757         cik_wait_for_rlc_serdes(rdev);
5758 }
5759
5760 /**
5761  * cik_rlc_start - start the RLC ME
5762  *
5763  * @rdev: radeon_device pointer
5764  *
5765  * Unhalt the RLC ME (MicroEngine) (CIK).
5766  */
5767 static void cik_rlc_start(struct radeon_device *rdev)
5768 {
5769         WREG32(RLC_CNTL, RLC_ENABLE);
5770
5771         cik_enable_gui_idle_interrupt(rdev, true);
5772
5773         udelay(50);
5774 }
5775
5776 /**
5777  * cik_rlc_resume - setup the RLC hw
5778  *
5779  * @rdev: radeon_device pointer
5780  *
5781  * Initialize the RLC registers, load the ucode,
5782  * and start the RLC (CIK).
5783  * Returns 0 for success, -EINVAL if the ucode is not available.
5784  */
5785 static int cik_rlc_resume(struct radeon_device *rdev)
5786 {
5787         u32 i, size, tmp;
5788         const __be32 *fw_data;
5789
5790         if (!rdev->rlc_fw)
5791                 return -EINVAL;
5792
5793         switch (rdev->family) {
5794         case CHIP_BONAIRE:
5795         case CHIP_HAWAII:
5796         default:
5797                 size = BONAIRE_RLC_UCODE_SIZE;
5798                 break;
5799         case CHIP_KAVERI:
5800                 size = KV_RLC_UCODE_SIZE;
5801                 break;
5802         case CHIP_KABINI:
5803                 size = KB_RLC_UCODE_SIZE;
5804                 break;
5805         }
5806
5807         cik_rlc_stop(rdev);
5808
5809         /* disable CG */
5810         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5811         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5812
5813         si_rlc_reset(rdev);
5814
5815         cik_init_pg(rdev);
5816
5817         cik_init_cg(rdev);
5818
5819         WREG32(RLC_LB_CNTR_INIT, 0);
5820         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5821
5822         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5823         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5824         WREG32(RLC_LB_PARAMS, 0x00600408);
5825         WREG32(RLC_LB_CNTL, 0x80000004);
5826
5827         WREG32(RLC_MC_CNTL, 0);
5828         WREG32(RLC_UCODE_CNTL, 0);
5829
5830         fw_data = (const __be32 *)rdev->rlc_fw->data;
5831                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5832         for (i = 0; i < size; i++)
5833                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5834         WREG32(RLC_GPM_UCODE_ADDR, 0);
5835
5836         /* XXX - find out what chips support lbpw */
5837         cik_enable_lbpw(rdev, false);
5838
5839         if (rdev->family == CHIP_BONAIRE)
5840                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5841
5842         cik_rlc_start(rdev);
5843
5844         return 0;
5845 }
5846
5847 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5848 {
5849         u32 data, orig, tmp, tmp2;
5850
5851         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5852
5853         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5854                 cik_enable_gui_idle_interrupt(rdev, true);
5855
5856                 tmp = cik_halt_rlc(rdev);
5857
5858                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5859                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5860                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5861                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5862                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5863
5864                 cik_update_rlc(rdev, tmp);
5865
5866                 data |= CGCG_EN | CGLS_EN;
5867         } else {
5868                 cik_enable_gui_idle_interrupt(rdev, false);
5869
5870                 RREG32(CB_CGTT_SCLK_CTRL);
5871                 RREG32(CB_CGTT_SCLK_CTRL);
5872                 RREG32(CB_CGTT_SCLK_CTRL);
5873                 RREG32(CB_CGTT_SCLK_CTRL);
5874
5875                 data &= ~(CGCG_EN | CGLS_EN);
5876         }
5877
5878         if (orig != data)
5879                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5880
5881 }
5882
5883 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5884 {
5885         u32 data, orig, tmp = 0;
5886
5887         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5888                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5889                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5890                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5891                                 data |= CP_MEM_LS_EN;
5892                                 if (orig != data)
5893                                         WREG32(CP_MEM_SLP_CNTL, data);
5894                         }
5895                 }
5896
5897                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5898                 data &= 0xfffffffd;
5899                 if (orig != data)
5900                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5901
5902                 tmp = cik_halt_rlc(rdev);
5903
5904                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5905                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5906                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5907                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5908                 WREG32(RLC_SERDES_WR_CTRL, data);
5909
5910                 cik_update_rlc(rdev, tmp);
5911
5912                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5913                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5914                         data &= ~SM_MODE_MASK;
5915                         data |= SM_MODE(0x2);
5916                         data |= SM_MODE_ENABLE;
5917                         data &= ~CGTS_OVERRIDE;
5918                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5919                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5920                                 data &= ~CGTS_LS_OVERRIDE;
5921                         data &= ~ON_MONITOR_ADD_MASK;
5922                         data |= ON_MONITOR_ADD_EN;
5923                         data |= ON_MONITOR_ADD(0x96);
5924                         if (orig != data)
5925                                 WREG32(CGTS_SM_CTRL_REG, data);
5926                 }
5927         } else {
5928                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5929                 data |= 0x00000002;
5930                 if (orig != data)
5931                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5932
5933                 data = RREG32(RLC_MEM_SLP_CNTL);
5934                 if (data & RLC_MEM_LS_EN) {
5935                         data &= ~RLC_MEM_LS_EN;
5936                         WREG32(RLC_MEM_SLP_CNTL, data);
5937                 }
5938
5939                 data = RREG32(CP_MEM_SLP_CNTL);
5940                 if (data & CP_MEM_LS_EN) {
5941                         data &= ~CP_MEM_LS_EN;
5942                         WREG32(CP_MEM_SLP_CNTL, data);
5943                 }
5944
5945                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5946                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5947                 if (orig != data)
5948                         WREG32(CGTS_SM_CTRL_REG, data);
5949
5950                 tmp = cik_halt_rlc(rdev);
5951
5952                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5953                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5954                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5955                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5956                 WREG32(RLC_SERDES_WR_CTRL, data);
5957
5958                 cik_update_rlc(rdev, tmp);
5959         }
5960 }
5961
5962 static const u32 mc_cg_registers[] =
5963 {
5964         MC_HUB_MISC_HUB_CG,
5965         MC_HUB_MISC_SIP_CG,
5966         MC_HUB_MISC_VM_CG,
5967         MC_XPB_CLK_GAT,
5968         ATC_MISC_CG,
5969         MC_CITF_MISC_WR_CG,
5970         MC_CITF_MISC_RD_CG,
5971         MC_CITF_MISC_VM_CG,
5972         VM_L2_CG,
5973 };
5974
5975 static void cik_enable_mc_ls(struct radeon_device *rdev,
5976                              bool enable)
5977 {
5978         int i;
5979         u32 orig, data;
5980
5981         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5982                 orig = data = RREG32(mc_cg_registers[i]);
5983                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5984                         data |= MC_LS_ENABLE;
5985                 else
5986                         data &= ~MC_LS_ENABLE;
5987                 if (data != orig)
5988                         WREG32(mc_cg_registers[i], data);
5989         }
5990 }
5991
5992 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5993                                bool enable)
5994 {
5995         int i;
5996         u32 orig, data;
5997
5998         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5999                 orig = data = RREG32(mc_cg_registers[i]);
6000                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6001                         data |= MC_CG_ENABLE;
6002                 else
6003                         data &= ~MC_CG_ENABLE;
6004                 if (data != orig)
6005                         WREG32(mc_cg_registers[i], data);
6006         }
6007 }
6008
6009 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6010                                  bool enable)
6011 {
6012         u32 orig, data;
6013
6014         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6015                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6016                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6017         } else {
6018                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6019                 data |= 0xff000000;
6020                 if (data != orig)
6021                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6022
6023                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6024                 data |= 0xff000000;
6025                 if (data != orig)
6026                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6027         }
6028 }
6029
6030 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6031                                  bool enable)
6032 {
6033         u32 orig, data;
6034
6035         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6036                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6037                 data |= 0x100;
6038                 if (orig != data)
6039                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6040
6041                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6042                 data |= 0x100;
6043                 if (orig != data)
6044                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6045         } else {
6046                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6047                 data &= ~0x100;
6048                 if (orig != data)
6049                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6050
6051                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6052                 data &= ~0x100;
6053                 if (orig != data)
6054                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6055         }
6056 }
6057
6058 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6059                                 bool enable)
6060 {
6061         u32 orig, data;
6062
6063         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6064                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6065                 data = 0xfff;
6066                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6067
6068                 orig = data = RREG32(UVD_CGC_CTRL);
6069                 data |= DCM;
6070                 if (orig != data)
6071                         WREG32(UVD_CGC_CTRL, data);
6072         } else {
6073                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6074                 data &= ~0xfff;
6075                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6076
6077                 orig = data = RREG32(UVD_CGC_CTRL);
6078                 data &= ~DCM;
6079                 if (orig != data)
6080                         WREG32(UVD_CGC_CTRL, data);
6081         }
6082 }
6083
6084 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6085                                bool enable)
6086 {
6087         u32 orig, data;
6088
6089         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6090
6091         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6092                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6093                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6094         else
6095                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6096                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6097
6098         if (orig != data)
6099                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6100 }
6101
6102 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6103                                 bool enable)
6104 {
6105         u32 orig, data;
6106
6107         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6108
6109         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6110                 data &= ~CLOCK_GATING_DIS;
6111         else
6112                 data |= CLOCK_GATING_DIS;
6113
6114         if (orig != data)
6115                 WREG32(HDP_HOST_PATH_CNTL, data);
6116 }
6117
6118 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6119                               bool enable)
6120 {
6121         u32 orig, data;
6122
6123         orig = data = RREG32(HDP_MEM_POWER_LS);
6124
6125         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6126                 data |= HDP_LS_ENABLE;
6127         else
6128                 data &= ~HDP_LS_ENABLE;
6129
6130         if (orig != data)
6131                 WREG32(HDP_MEM_POWER_LS, data);
6132 }
6133
6134 void cik_update_cg(struct radeon_device *rdev,
6135                    u32 block, bool enable)
6136 {
6137
6138         if (block & RADEON_CG_BLOCK_GFX) {
6139                 cik_enable_gui_idle_interrupt(rdev, false);
6140                 /* order matters! */
6141                 if (enable) {
6142                         cik_enable_mgcg(rdev, true);
6143                         cik_enable_cgcg(rdev, true);
6144                 } else {
6145                         cik_enable_cgcg(rdev, false);
6146                         cik_enable_mgcg(rdev, false);
6147                 }
6148                 cik_enable_gui_idle_interrupt(rdev, true);
6149         }
6150
6151         if (block & RADEON_CG_BLOCK_MC) {
6152                 if (!(rdev->flags & RADEON_IS_IGP)) {
6153                         cik_enable_mc_mgcg(rdev, enable);
6154                         cik_enable_mc_ls(rdev, enable);
6155                 }
6156         }
6157
6158         if (block & RADEON_CG_BLOCK_SDMA) {
6159                 cik_enable_sdma_mgcg(rdev, enable);
6160                 cik_enable_sdma_mgls(rdev, enable);
6161         }
6162
6163         if (block & RADEON_CG_BLOCK_BIF) {
6164                 cik_enable_bif_mgls(rdev, enable);
6165         }
6166
6167         if (block & RADEON_CG_BLOCK_UVD) {
6168                 if (rdev->has_uvd)
6169                         cik_enable_uvd_mgcg(rdev, enable);
6170         }
6171
6172         if (block & RADEON_CG_BLOCK_HDP) {
6173                 cik_enable_hdp_mgcg(rdev, enable);
6174                 cik_enable_hdp_ls(rdev, enable);
6175         }
6176
6177         if (block & RADEON_CG_BLOCK_VCE) {
6178                 vce_v2_0_enable_mgcg(rdev, enable);
6179         }
6180 }
6181
6182 static void cik_init_cg(struct radeon_device *rdev)
6183 {
6184
6185         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6186
6187         if (rdev->has_uvd)
6188                 si_init_uvd_internal_cg(rdev);
6189
6190         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6191                              RADEON_CG_BLOCK_SDMA |
6192                              RADEON_CG_BLOCK_BIF |
6193                              RADEON_CG_BLOCK_UVD |
6194                              RADEON_CG_BLOCK_HDP), true);
6195 }
6196
6197 static void cik_fini_cg(struct radeon_device *rdev)
6198 {
6199         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6200                              RADEON_CG_BLOCK_SDMA |
6201                              RADEON_CG_BLOCK_BIF |
6202                              RADEON_CG_BLOCK_UVD |
6203                              RADEON_CG_BLOCK_HDP), false);
6204
6205         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6206 }
6207
6208 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6209                                           bool enable)
6210 {
6211         u32 data, orig;
6212
6213         orig = data = RREG32(RLC_PG_CNTL);
6214         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6215                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6216         else
6217                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6218         if (orig != data)
6219                 WREG32(RLC_PG_CNTL, data);
6220 }
6221
6222 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6223                                           bool enable)
6224 {
6225         u32 data, orig;
6226
6227         orig = data = RREG32(RLC_PG_CNTL);
6228         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6229                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6230         else
6231                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6232         if (orig != data)
6233                 WREG32(RLC_PG_CNTL, data);
6234 }
6235
6236 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6237 {
6238         u32 data, orig;
6239
6240         orig = data = RREG32(RLC_PG_CNTL);
6241         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6242                 data &= ~DISABLE_CP_PG;
6243         else
6244                 data |= DISABLE_CP_PG;
6245         if (orig != data)
6246                 WREG32(RLC_PG_CNTL, data);
6247 }
6248
6249 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6250 {
6251         u32 data, orig;
6252
6253         orig = data = RREG32(RLC_PG_CNTL);
6254         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6255                 data &= ~DISABLE_GDS_PG;
6256         else
6257                 data |= DISABLE_GDS_PG;
6258         if (orig != data)
6259                 WREG32(RLC_PG_CNTL, data);
6260 }
6261
6262 #define CP_ME_TABLE_SIZE    96
6263 #define CP_ME_TABLE_OFFSET  2048
6264 #define CP_MEC_TABLE_OFFSET 4096
6265
6266 void cik_init_cp_pg_table(struct radeon_device *rdev)
6267 {
6268         const __be32 *fw_data;
6269         volatile u32 *dst_ptr;
6270         int me, i, max_me = 4;
6271         u32 bo_offset = 0;
6272         u32 table_offset;
6273
6274         if (rdev->family == CHIP_KAVERI)
6275                 max_me = 5;
6276
6277         if (rdev->rlc.cp_table_ptr == NULL)
6278                 return;
6279
6280         /* write the cp table buffer */
6281         dst_ptr = rdev->rlc.cp_table_ptr;
6282         for (me = 0; me < max_me; me++) {
6283                 if (me == 0) {
6284                         fw_data = (const __be32 *)rdev->ce_fw->data;
6285                         table_offset = CP_ME_TABLE_OFFSET;
6286                 } else if (me == 1) {
6287                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6288                         table_offset = CP_ME_TABLE_OFFSET;
6289                 } else if (me == 2) {
6290                         fw_data = (const __be32 *)rdev->me_fw->data;
6291                         table_offset = CP_ME_TABLE_OFFSET;
6292                 } else {
6293                         fw_data = (const __be32 *)rdev->mec_fw->data;
6294                         table_offset = CP_MEC_TABLE_OFFSET;
6295                 }
6296
6297                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6298                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6299                 }
6300                 bo_offset += CP_ME_TABLE_SIZE;
6301         }
6302 }
6303
6304 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6305                                 bool enable)
6306 {
6307         u32 data, orig;
6308
6309         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6310                 orig = data = RREG32(RLC_PG_CNTL);
6311                 data |= GFX_PG_ENABLE;
6312                 if (orig != data)
6313                         WREG32(RLC_PG_CNTL, data);
6314
6315                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6316                 data |= AUTO_PG_EN;
6317                 if (orig != data)
6318                         WREG32(RLC_AUTO_PG_CTRL, data);
6319         } else {
6320                 orig = data = RREG32(RLC_PG_CNTL);
6321                 data &= ~GFX_PG_ENABLE;
6322                 if (orig != data)
6323                         WREG32(RLC_PG_CNTL, data);
6324
6325                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6326                 data &= ~AUTO_PG_EN;
6327                 if (orig != data)
6328                         WREG32(RLC_AUTO_PG_CTRL, data);
6329
6330                 data = RREG32(DB_RENDER_CONTROL);
6331         }
6332 }
6333
6334 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6335 {
6336         u32 mask = 0, tmp, tmp1;
6337         int i;
6338
6339         cik_select_se_sh(rdev, se, sh);
6340         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6341         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6342         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6343
6344         tmp &= 0xffff0000;
6345
6346         tmp |= tmp1;
6347         tmp >>= 16;
6348
6349         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6350                 mask <<= 1;
6351                 mask |= 1;
6352         }
6353
6354         return (~tmp) & mask;
6355 }
6356
6357 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6358 {
6359         u32 i, j, k, active_cu_number = 0;
6360         u32 mask, counter, cu_bitmap;
6361         u32 tmp = 0;
6362
6363         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6364                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6365                         mask = 1;
6366                         cu_bitmap = 0;
6367                         counter = 0;
6368                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6369                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6370                                         if (counter < 2)
6371                                                 cu_bitmap |= mask;
6372                                         counter ++;
6373                                 }
6374                                 mask <<= 1;
6375                         }
6376
6377                         active_cu_number += counter;
6378                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6379                 }
6380         }
6381
6382         WREG32(RLC_PG_AO_CU_MASK, tmp);
6383
6384         tmp = RREG32(RLC_MAX_PG_CU);
6385         tmp &= ~MAX_PU_CU_MASK;
6386         tmp |= MAX_PU_CU(active_cu_number);
6387         WREG32(RLC_MAX_PG_CU, tmp);
6388 }
6389
6390 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6391                                        bool enable)
6392 {
6393         u32 data, orig;
6394
6395         orig = data = RREG32(RLC_PG_CNTL);
6396         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6397                 data |= STATIC_PER_CU_PG_ENABLE;
6398         else
6399                 data &= ~STATIC_PER_CU_PG_ENABLE;
6400         if (orig != data)
6401                 WREG32(RLC_PG_CNTL, data);
6402 }
6403
6404 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6405                                         bool enable)
6406 {
6407         u32 data, orig;
6408
6409         orig = data = RREG32(RLC_PG_CNTL);
6410         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6411                 data |= DYN_PER_CU_PG_ENABLE;
6412         else
6413                 data &= ~DYN_PER_CU_PG_ENABLE;
6414         if (orig != data)
6415                 WREG32(RLC_PG_CNTL, data);
6416 }
6417
6418 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6419 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6420
6421 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6422 {
6423         u32 data, orig;
6424         u32 i;
6425
6426         if (rdev->rlc.cs_data) {
6427                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6428                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6429                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6430                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6431         } else {
6432                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6433                 for (i = 0; i < 3; i++)
6434                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6435         }
6436         if (rdev->rlc.reg_list) {
6437                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6438                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6439                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6440         }
6441
6442         orig = data = RREG32(RLC_PG_CNTL);
6443         data |= GFX_PG_SRC;
6444         if (orig != data)
6445                 WREG32(RLC_PG_CNTL, data);
6446
6447         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6448         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6449
6450         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6451         data &= ~IDLE_POLL_COUNT_MASK;
6452         data |= IDLE_POLL_COUNT(0x60);
6453         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6454
6455         data = 0x10101010;
6456         WREG32(RLC_PG_DELAY, data);
6457
6458         data = RREG32(RLC_PG_DELAY_2);
6459         data &= ~0xff;
6460         data |= 0x3;
6461         WREG32(RLC_PG_DELAY_2, data);
6462
6463         data = RREG32(RLC_AUTO_PG_CTRL);
6464         data &= ~GRBM_REG_SGIT_MASK;
6465         data |= GRBM_REG_SGIT(0x700);
6466         WREG32(RLC_AUTO_PG_CTRL, data);
6467
6468 }
6469
6470 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6471 {
6472         cik_enable_gfx_cgpg(rdev, enable);
6473         cik_enable_gfx_static_mgpg(rdev, enable);
6474         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6475 }
6476
6477 u32 cik_get_csb_size(struct radeon_device *rdev)
6478 {
6479         u32 count = 0;
6480         const struct cs_section_def *sect = NULL;
6481         const struct cs_extent_def *ext = NULL;
6482
6483         if (rdev->rlc.cs_data == NULL)
6484                 return 0;
6485
6486         /* begin clear state */
6487         count += 2;
6488         /* context control state */
6489         count += 3;
6490
6491         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6492                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6493                         if (sect->id == SECT_CONTEXT)
6494                                 count += 2 + ext->reg_count;
6495                         else
6496                                 return 0;
6497                 }
6498         }
6499         /* pa_sc_raster_config/pa_sc_raster_config1 */
6500         count += 4;
6501         /* end clear state */
6502         count += 2;
6503         /* clear state */
6504         count += 2;
6505
6506         return count;
6507 }
6508
6509 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6510 {
6511         u32 count = 0, i;
6512         const struct cs_section_def *sect = NULL;
6513         const struct cs_extent_def *ext = NULL;
6514
6515         if (rdev->rlc.cs_data == NULL)
6516                 return;
6517         if (buffer == NULL)
6518                 return;
6519
6520         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6521         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6522
6523         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6524         buffer[count++] = cpu_to_le32(0x80000000);
6525         buffer[count++] = cpu_to_le32(0x80000000);
6526
6527         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6528                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6529                         if (sect->id == SECT_CONTEXT) {
6530                                 buffer[count++] =
6531                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6532                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6533                                 for (i = 0; i < ext->reg_count; i++)
6534                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6535                         } else {
6536                                 return;
6537                         }
6538                 }
6539         }
6540
6541         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6542         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6543         switch (rdev->family) {
6544         case CHIP_BONAIRE:
6545                 buffer[count++] = cpu_to_le32(0x16000012);
6546                 buffer[count++] = cpu_to_le32(0x00000000);
6547                 break;
6548         case CHIP_KAVERI:
6549                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6550                 buffer[count++] = cpu_to_le32(0x00000000);
6551                 break;
6552         case CHIP_KABINI:
6553                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6554                 buffer[count++] = cpu_to_le32(0x00000000);
6555                 break;
6556         case CHIP_HAWAII:
6557                 buffer[count++] = cpu_to_le32(0x3a00161a);
6558                 buffer[count++] = cpu_to_le32(0x0000002e);
6559                 break;
6560         default:
6561                 buffer[count++] = cpu_to_le32(0x00000000);
6562                 buffer[count++] = cpu_to_le32(0x00000000);
6563                 break;
6564         }
6565
6566         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6567         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6568
6569         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6570         buffer[count++] = cpu_to_le32(0);
6571 }
6572
6573 static void cik_init_pg(struct radeon_device *rdev)
6574 {
6575         if (rdev->pg_flags) {
6576                 cik_enable_sck_slowdown_on_pu(rdev, true);
6577                 cik_enable_sck_slowdown_on_pd(rdev, true);
6578                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6579                         cik_init_gfx_cgpg(rdev);
6580                         cik_enable_cp_pg(rdev, true);
6581                         cik_enable_gds_pg(rdev, true);
6582                 }
6583                 cik_init_ao_cu_mask(rdev);
6584                 cik_update_gfx_pg(rdev, true);
6585         }
6586 }
6587
6588 static void cik_fini_pg(struct radeon_device *rdev)
6589 {
6590         if (rdev->pg_flags) {
6591                 cik_update_gfx_pg(rdev, false);
6592                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6593                         cik_enable_cp_pg(rdev, false);
6594                         cik_enable_gds_pg(rdev, false);
6595                 }
6596         }
6597 }
6598
6599 /*
6600  * Interrupts
6601  * Starting with r6xx, interrupts are handled via a ring buffer.
6602  * Ring buffers are areas of GPU accessible memory that the GPU
6603  * writes interrupt vectors into and the host reads vectors out of.
6604  * There is a rptr (read pointer) that determines where the
6605  * host is currently reading, and a wptr (write pointer)
6606  * which determines where the GPU has written.  When the
6607  * pointers are equal, the ring is idle.  When the GPU
6608  * writes vectors to the ring buffer, it increments the
6609  * wptr.  When there is an interrupt, the host then starts
6610  * fetching commands and processing them until the pointers are
6611  * equal again at which point it updates the rptr.
6612  */
6613
6614 /**
6615  * cik_enable_interrupts - Enable the interrupt ring buffer
6616  *
6617  * @rdev: radeon_device pointer
6618  *
6619  * Enable the interrupt ring buffer (CIK).
6620  */
6621 static void cik_enable_interrupts(struct radeon_device *rdev)
6622 {
6623         u32 ih_cntl = RREG32(IH_CNTL);
6624         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6625
6626         ih_cntl |= ENABLE_INTR;
6627         ih_rb_cntl |= IH_RB_ENABLE;
6628         WREG32(IH_CNTL, ih_cntl);
6629         WREG32(IH_RB_CNTL, ih_rb_cntl);
6630         rdev->ih.enabled = true;
6631 }
6632
6633 /**
6634  * cik_disable_interrupts - Disable the interrupt ring buffer
6635  *
6636  * @rdev: radeon_device pointer
6637  *
6638  * Disable the interrupt ring buffer (CIK).
6639  */
6640 static void cik_disable_interrupts(struct radeon_device *rdev)
6641 {
6642         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6643         u32 ih_cntl = RREG32(IH_CNTL);
6644
6645         ih_rb_cntl &= ~IH_RB_ENABLE;
6646         ih_cntl &= ~ENABLE_INTR;
6647         WREG32(IH_RB_CNTL, ih_rb_cntl);
6648         WREG32(IH_CNTL, ih_cntl);
6649         /* set rptr, wptr to 0 */
6650         WREG32(IH_RB_RPTR, 0);
6651         WREG32(IH_RB_WPTR, 0);
6652         rdev->ih.enabled = false;
6653         rdev->ih.rptr = 0;
6654 }
6655
6656 /**
6657  * cik_disable_interrupt_state - Disable all interrupt sources
6658  *
6659  * @rdev: radeon_device pointer
6660  *
6661  * Clear all interrupt enable bits used by the driver (CIK).
6662  */
6663 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6664 {
6665         u32 tmp;
6666
6667         /* gfx ring */
6668         tmp = RREG32(CP_INT_CNTL_RING0) &
6669                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6670         WREG32(CP_INT_CNTL_RING0, tmp);
6671         /* sdma */
6672         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6673         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6674         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6675         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6676         /* compute queues */
6677         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6678         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6679         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6680         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6681         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6682         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6683         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6684         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6685         /* grbm */
6686         WREG32(GRBM_INT_CNTL, 0);
6687         /* vline/vblank, etc. */
6688         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6689         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6690         if (rdev->num_crtc >= 4) {
6691                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6692                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6693         }
6694         if (rdev->num_crtc >= 6) {
6695                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6696                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6697         }
6698
6699         /* dac hotplug */
6700         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6701
6702         /* digital hotplug */
6703         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6704         WREG32(DC_HPD1_INT_CONTROL, tmp);
6705         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6706         WREG32(DC_HPD2_INT_CONTROL, tmp);
6707         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6708         WREG32(DC_HPD3_INT_CONTROL, tmp);
6709         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6710         WREG32(DC_HPD4_INT_CONTROL, tmp);
6711         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6712         WREG32(DC_HPD5_INT_CONTROL, tmp);
6713         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6714         WREG32(DC_HPD6_INT_CONTROL, tmp);
6715
6716 }
6717
6718 /**
6719  * cik_irq_init - init and enable the interrupt ring
6720  *
6721  * @rdev: radeon_device pointer
6722  *
6723  * Allocate a ring buffer for the interrupt controller,
6724  * enable the RLC, disable interrupts, enable the IH
6725  * ring buffer and enable it (CIK).
6726  * Called at device load and reume.
6727  * Returns 0 for success, errors for failure.
6728  */
6729 static int cik_irq_init(struct radeon_device *rdev)
6730 {
6731         int ret = 0;
6732         int rb_bufsz;
6733         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6734
6735         /* allocate ring */
6736         ret = r600_ih_ring_alloc(rdev);
6737         if (ret)
6738                 return ret;
6739
6740         /* disable irqs */
6741         cik_disable_interrupts(rdev);
6742
6743         /* init rlc */
6744         ret = cik_rlc_resume(rdev);
6745         if (ret) {
6746                 r600_ih_ring_fini(rdev);
6747                 return ret;
6748         }
6749
6750         /* setup interrupt control */
6751         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6752         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6753         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6754         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6755          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6756          */
6757         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6758         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6759         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6760         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6761
6762         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6763         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6764
6765         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6766                       IH_WPTR_OVERFLOW_CLEAR |
6767                       (rb_bufsz << 1));
6768
6769         if (rdev->wb.enabled)
6770                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6771
6772         /* set the writeback address whether it's enabled or not */
6773         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6774         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6775
6776         WREG32(IH_RB_CNTL, ih_rb_cntl);
6777
6778         /* set rptr, wptr to 0 */
6779         WREG32(IH_RB_RPTR, 0);
6780         WREG32(IH_RB_WPTR, 0);
6781
6782         /* Default settings for IH_CNTL (disabled at first) */
6783         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6784         /* RPTR_REARM only works if msi's are enabled */
6785         if (rdev->msi_enabled)
6786                 ih_cntl |= RPTR_REARM;
6787         WREG32(IH_CNTL, ih_cntl);
6788
6789         /* force the active interrupt state to all disabled */
6790         cik_disable_interrupt_state(rdev);
6791
6792         pci_set_master(rdev->pdev);
6793
6794         /* enable irqs */
6795         cik_enable_interrupts(rdev);
6796
6797         return ret;
6798 }
6799
6800 /**
6801  * cik_irq_set - enable/disable interrupt sources
6802  *
6803  * @rdev: radeon_device pointer
6804  *
6805  * Enable interrupt sources on the GPU (vblanks, hpd,
6806  * etc.) (CIK).
6807  * Returns 0 for success, errors for failure.
6808  */
6809 int cik_irq_set(struct radeon_device *rdev)
6810 {
6811         u32 cp_int_cntl;
6812         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6813         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6814         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6815         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6816         u32 grbm_int_cntl = 0;
6817         u32 dma_cntl, dma_cntl1;
6818         u32 thermal_int;
6819
6820         if (!rdev->irq.installed) {
6821                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6822                 return -EINVAL;
6823         }
6824         /* don't enable anything if the ih is disabled */
6825         if (!rdev->ih.enabled) {
6826                 cik_disable_interrupts(rdev);
6827                 /* force the active interrupt state to all disabled */
6828                 cik_disable_interrupt_state(rdev);
6829                 return 0;
6830         }
6831
6832         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6833                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6834         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6835
6836         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6837         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6838         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6839         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6840         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6841         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6842
6843         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6844         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6845
6846         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6847         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6848         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6849         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6850         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6851         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6852         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6853         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6854
6855         if (rdev->flags & RADEON_IS_IGP)
6856                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6857                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6858         else
6859                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6860                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6861
6862         /* enable CP interrupts on all rings */
6863         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6864                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6865                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6866         }
6867         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6868                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6869                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6870                 if (ring->me == 1) {
6871                         switch (ring->pipe) {
6872                         case 0:
6873                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6874                                 break;
6875                         case 1:
6876                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6877                                 break;
6878                         case 2:
6879                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6880                                 break;
6881                         case 3:
6882                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6883                                 break;
6884                         default:
6885                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6886                                 break;
6887                         }
6888                 } else if (ring->me == 2) {
6889                         switch (ring->pipe) {
6890                         case 0:
6891                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6892                                 break;
6893                         case 1:
6894                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6895                                 break;
6896                         case 2:
6897                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6898                                 break;
6899                         case 3:
6900                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6901                                 break;
6902                         default:
6903                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6904                                 break;
6905                         }
6906                 } else {
6907                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6908                 }
6909         }
6910         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6911                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6912                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6913                 if (ring->me == 1) {
6914                         switch (ring->pipe) {
6915                         case 0:
6916                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6917                                 break;
6918                         case 1:
6919                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6920                                 break;
6921                         case 2:
6922                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6923                                 break;
6924                         case 3:
6925                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6926                                 break;
6927                         default:
6928                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6929                                 break;
6930                         }
6931                 } else if (ring->me == 2) {
6932                         switch (ring->pipe) {
6933                         case 0:
6934                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6935                                 break;
6936                         case 1:
6937                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6938                                 break;
6939                         case 2:
6940                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6941                                 break;
6942                         case 3:
6943                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6944                                 break;
6945                         default:
6946                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6947                                 break;
6948                         }
6949                 } else {
6950                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6951                 }
6952         }
6953
6954         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6955                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6956                 dma_cntl |= TRAP_ENABLE;
6957         }
6958
6959         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6960                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6961                 dma_cntl1 |= TRAP_ENABLE;
6962         }
6963
6964         if (rdev->irq.crtc_vblank_int[0] ||
6965             atomic_read(&rdev->irq.pflip[0])) {
6966                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6967                 crtc1 |= VBLANK_INTERRUPT_MASK;
6968         }
6969         if (rdev->irq.crtc_vblank_int[1] ||
6970             atomic_read(&rdev->irq.pflip[1])) {
6971                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6972                 crtc2 |= VBLANK_INTERRUPT_MASK;
6973         }
6974         if (rdev->irq.crtc_vblank_int[2] ||
6975             atomic_read(&rdev->irq.pflip[2])) {
6976                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6977                 crtc3 |= VBLANK_INTERRUPT_MASK;
6978         }
6979         if (rdev->irq.crtc_vblank_int[3] ||
6980             atomic_read(&rdev->irq.pflip[3])) {
6981                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6982                 crtc4 |= VBLANK_INTERRUPT_MASK;
6983         }
6984         if (rdev->irq.crtc_vblank_int[4] ||
6985             atomic_read(&rdev->irq.pflip[4])) {
6986                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6987                 crtc5 |= VBLANK_INTERRUPT_MASK;
6988         }
6989         if (rdev->irq.crtc_vblank_int[5] ||
6990             atomic_read(&rdev->irq.pflip[5])) {
6991                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6992                 crtc6 |= VBLANK_INTERRUPT_MASK;
6993         }
6994         if (rdev->irq.hpd[0]) {
6995                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6996                 hpd1 |= DC_HPDx_INT_EN;
6997         }
6998         if (rdev->irq.hpd[1]) {
6999                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7000                 hpd2 |= DC_HPDx_INT_EN;
7001         }
7002         if (rdev->irq.hpd[2]) {
7003                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7004                 hpd3 |= DC_HPDx_INT_EN;
7005         }
7006         if (rdev->irq.hpd[3]) {
7007                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7008                 hpd4 |= DC_HPDx_INT_EN;
7009         }
7010         if (rdev->irq.hpd[4]) {
7011                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7012                 hpd5 |= DC_HPDx_INT_EN;
7013         }
7014         if (rdev->irq.hpd[5]) {
7015                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7016                 hpd6 |= DC_HPDx_INT_EN;
7017         }
7018
7019         if (rdev->irq.dpm_thermal) {
7020                 DRM_DEBUG("dpm thermal\n");
7021                 if (rdev->flags & RADEON_IS_IGP)
7022                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7023                 else
7024                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7025         }
7026
7027         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7028
7029         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7030         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7031
7032         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7033         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7034         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7035         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7036         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7037         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7038         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7039         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7040
7041         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7042
7043         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7044         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7045         if (rdev->num_crtc >= 4) {
7046                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7047                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7048         }
7049         if (rdev->num_crtc >= 6) {
7050                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7051                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7052         }
7053
7054         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7055         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7056         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7057         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7058         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7059         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7060
7061         if (rdev->flags & RADEON_IS_IGP)
7062                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7063         else
7064                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7065
7066         return 0;
7067 }
7068
7069 /**
7070  * cik_irq_ack - ack interrupt sources
7071  *
7072  * @rdev: radeon_device pointer
7073  *
7074  * Ack interrupt sources on the GPU (vblanks, hpd,
7075  * etc.) (CIK).  Certain interrupts sources are sw
7076  * generated and do not require an explicit ack.
7077  */
7078 static inline void cik_irq_ack(struct radeon_device *rdev)
7079 {
7080         u32 tmp;
7081
7082         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7083         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7084         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7085         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7086         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7087         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7088         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7089
7090         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7091                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7092         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7093                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7094         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7095                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7096         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7097                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7098
7099         if (rdev->num_crtc >= 4) {
7100                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7101                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7102                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7103                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7104                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7105                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7106                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7107                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7108         }
7109
7110         if (rdev->num_crtc >= 6) {
7111                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7112                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7113                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7114                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7115                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7116                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7117                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7118                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7119         }
7120
7121         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7122                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7123                 tmp |= DC_HPDx_INT_ACK;
7124                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7125         }
7126         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7127                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7128                 tmp |= DC_HPDx_INT_ACK;
7129                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7130         }
7131         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7132                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7133                 tmp |= DC_HPDx_INT_ACK;
7134                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7135         }
7136         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7137                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7138                 tmp |= DC_HPDx_INT_ACK;
7139                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7140         }
7141         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7142                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7143                 tmp |= DC_HPDx_INT_ACK;
7144                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7145         }
7146         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7147                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7148                 tmp |= DC_HPDx_INT_ACK;
7149                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7150         }
7151 }
7152
7153 /**
7154  * cik_irq_disable - disable interrupts
7155  *
7156  * @rdev: radeon_device pointer
7157  *
7158  * Disable interrupts on the hw (CIK).
7159  */
7160 static void cik_irq_disable(struct radeon_device *rdev)
7161 {
7162         cik_disable_interrupts(rdev);
7163         /* Wait and acknowledge irq */
7164         mdelay(1);
7165         cik_irq_ack(rdev);
7166         cik_disable_interrupt_state(rdev);
7167 }
7168
7169 /**
7170  * cik_irq_disable - disable interrupts for suspend
7171  *
7172  * @rdev: radeon_device pointer
7173  *
7174  * Disable interrupts and stop the RLC (CIK).
7175  * Used for suspend.
7176  */
7177 static void cik_irq_suspend(struct radeon_device *rdev)
7178 {
7179         cik_irq_disable(rdev);
7180         cik_rlc_stop(rdev);
7181 }
7182
7183 /**
7184  * cik_irq_fini - tear down interrupt support
7185  *
7186  * @rdev: radeon_device pointer
7187  *
7188  * Disable interrupts on the hw and free the IH ring
7189  * buffer (CIK).
7190  * Used for driver unload.
7191  */
7192 static void cik_irq_fini(struct radeon_device *rdev)
7193 {
7194         cik_irq_suspend(rdev);
7195         r600_ih_ring_fini(rdev);
7196 }
7197
7198 /**
7199  * cik_get_ih_wptr - get the IH ring buffer wptr
7200  *
7201  * @rdev: radeon_device pointer
7202  *
7203  * Get the IH ring buffer wptr from either the register
7204  * or the writeback memory buffer (CIK).  Also check for
7205  * ring buffer overflow and deal with it.
7206  * Used by cik_irq_process().
7207  * Returns the value of the wptr.
7208  */
7209 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7210 {
7211         u32 wptr, tmp;
7212
7213         if (rdev->wb.enabled)
7214                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7215         else
7216                 wptr = RREG32(IH_RB_WPTR);
7217
7218         if (wptr & RB_OVERFLOW) {
7219                 /* When a ring buffer overflow happen start parsing interrupt
7220                  * from the last not overwritten vector (wptr + 16). Hopefully
7221                  * this should allow us to catchup.
7222                  */
7223                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7224                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7225                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7226                 tmp = RREG32(IH_RB_CNTL);
7227                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7228                 WREG32(IH_RB_CNTL, tmp);
7229         }
7230         return (wptr & rdev->ih.ptr_mask);
7231 }
7232
7233 /*        CIK IV Ring
7234  * Each IV ring entry is 128 bits:
7235  * [7:0]    - interrupt source id
7236  * [31:8]   - reserved
7237  * [59:32]  - interrupt source data
7238  * [63:60]  - reserved
7239  * [71:64]  - RINGID
7240  *            CP:
7241  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7242  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7243  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7244  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7245  *            PIPE_ID - ME0 0=3D
7246  *                    - ME1&2 compute dispatcher (4 pipes each)
7247  *            SDMA:
7248  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7249  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7250  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7251  * [79:72]  - VMID
7252  * [95:80]  - PASID
7253  * [127:96] - reserved
7254  */
7255 /**
7256  * cik_irq_process - interrupt handler
7257  *
7258  * @rdev: radeon_device pointer
7259  *
7260  * Interrupt hander (CIK).  Walk the IH ring,
7261  * ack interrupts and schedule work to handle
7262  * interrupt events.
7263  * Returns irq process return code.
7264  */
7265 int cik_irq_process(struct radeon_device *rdev)
7266 {
7267         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7268         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7269         u32 wptr;
7270         u32 rptr;
7271         u32 src_id, src_data, ring_id;
7272         u8 me_id, pipe_id, queue_id;
7273         u32 ring_index;
7274         bool queue_hotplug = false;
7275         bool queue_reset = false;
7276         u32 addr, status, mc_client;
7277         bool queue_thermal = false;
7278
7279         if (!rdev->ih.enabled || rdev->shutdown)
7280                 return IRQ_NONE;
7281
7282         wptr = cik_get_ih_wptr(rdev);
7283
7284 restart_ih:
7285         /* is somebody else already processing irqs? */
7286         if (atomic_xchg(&rdev->ih.lock, 1))
7287                 return IRQ_NONE;
7288
7289         rptr = rdev->ih.rptr;
7290         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7291
7292         /* Order reading of wptr vs. reading of IH ring data */
7293         rmb();
7294
7295         /* display interrupts */
7296         cik_irq_ack(rdev);
7297
7298         while (rptr != wptr) {
7299                 /* wptr/rptr are in bytes! */
7300                 ring_index = rptr / 4;
7301                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7302                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7303                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7304
7305                 switch (src_id) {
7306                 case 1: /* D1 vblank/vline */
7307                         switch (src_data) {
7308                         case 0: /* D1 vblank */
7309                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7310                                         if (rdev->irq.crtc_vblank_int[0]) {
7311                                                 drm_handle_vblank(rdev->ddev, 0);
7312                                                 rdev->pm.vblank_sync = true;
7313                                                 wake_up(&rdev->irq.vblank_queue);
7314                                         }
7315                                         if (atomic_read(&rdev->irq.pflip[0]))
7316                                                 radeon_crtc_handle_flip(rdev, 0);
7317                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7318                                         DRM_DEBUG("IH: D1 vblank\n");
7319                                 }
7320                                 break;
7321                         case 1: /* D1 vline */
7322                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7323                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7324                                         DRM_DEBUG("IH: D1 vline\n");
7325                                 }
7326                                 break;
7327                         default:
7328                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7329                                 break;
7330                         }
7331                         break;
7332                 case 2: /* D2 vblank/vline */
7333                         switch (src_data) {
7334                         case 0: /* D2 vblank */
7335                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7336                                         if (rdev->irq.crtc_vblank_int[1]) {
7337                                                 drm_handle_vblank(rdev->ddev, 1);
7338                                                 rdev->pm.vblank_sync = true;
7339                                                 wake_up(&rdev->irq.vblank_queue);
7340                                         }
7341                                         if (atomic_read(&rdev->irq.pflip[1]))
7342                                                 radeon_crtc_handle_flip(rdev, 1);
7343                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7344                                         DRM_DEBUG("IH: D2 vblank\n");
7345                                 }
7346                                 break;
7347                         case 1: /* D2 vline */
7348                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7349                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7350                                         DRM_DEBUG("IH: D2 vline\n");
7351                                 }
7352                                 break;
7353                         default:
7354                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7355                                 break;
7356                         }
7357                         break;
7358                 case 3: /* D3 vblank/vline */
7359                         switch (src_data) {
7360                         case 0: /* D3 vblank */
7361                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7362                                         if (rdev->irq.crtc_vblank_int[2]) {
7363                                                 drm_handle_vblank(rdev->ddev, 2);
7364                                                 rdev->pm.vblank_sync = true;
7365                                                 wake_up(&rdev->irq.vblank_queue);
7366                                         }
7367                                         if (atomic_read(&rdev->irq.pflip[2]))
7368                                                 radeon_crtc_handle_flip(rdev, 2);
7369                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7370                                         DRM_DEBUG("IH: D3 vblank\n");
7371                                 }
7372                                 break;
7373                         case 1: /* D3 vline */
7374                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7375                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7376                                         DRM_DEBUG("IH: D3 vline\n");
7377                                 }
7378                                 break;
7379                         default:
7380                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7381                                 break;
7382                         }
7383                         break;
7384                 case 4: /* D4 vblank/vline */
7385                         switch (src_data) {
7386                         case 0: /* D4 vblank */
7387                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7388                                         if (rdev->irq.crtc_vblank_int[3]) {
7389                                                 drm_handle_vblank(rdev->ddev, 3);
7390                                                 rdev->pm.vblank_sync = true;
7391                                                 wake_up(&rdev->irq.vblank_queue);
7392                                         }
7393                                         if (atomic_read(&rdev->irq.pflip[3]))
7394                                                 radeon_crtc_handle_flip(rdev, 3);
7395                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7396                                         DRM_DEBUG("IH: D4 vblank\n");
7397                                 }
7398                                 break;
7399                         case 1: /* D4 vline */
7400                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7401                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7402                                         DRM_DEBUG("IH: D4 vline\n");
7403                                 }
7404                                 break;
7405                         default:
7406                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7407                                 break;
7408                         }
7409                         break;
7410                 case 5: /* D5 vblank/vline */
7411                         switch (src_data) {
7412                         case 0: /* D5 vblank */
7413                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7414                                         if (rdev->irq.crtc_vblank_int[4]) {
7415                                                 drm_handle_vblank(rdev->ddev, 4);
7416                                                 rdev->pm.vblank_sync = true;
7417                                                 wake_up(&rdev->irq.vblank_queue);
7418                                         }
7419                                         if (atomic_read(&rdev->irq.pflip[4]))
7420                                                 radeon_crtc_handle_flip(rdev, 4);
7421                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7422                                         DRM_DEBUG("IH: D5 vblank\n");
7423                                 }
7424                                 break;
7425                         case 1: /* D5 vline */
7426                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7427                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7428                                         DRM_DEBUG("IH: D5 vline\n");
7429                                 }
7430                                 break;
7431                         default:
7432                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7433                                 break;
7434                         }
7435                         break;
7436                 case 6: /* D6 vblank/vline */
7437                         switch (src_data) {
7438                         case 0: /* D6 vblank */
7439                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7440                                         if (rdev->irq.crtc_vblank_int[5]) {
7441                                                 drm_handle_vblank(rdev->ddev, 5);
7442                                                 rdev->pm.vblank_sync = true;
7443                                                 wake_up(&rdev->irq.vblank_queue);
7444                                         }
7445                                         if (atomic_read(&rdev->irq.pflip[5]))
7446                                                 radeon_crtc_handle_flip(rdev, 5);
7447                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7448                                         DRM_DEBUG("IH: D6 vblank\n");
7449                                 }
7450                                 break;
7451                         case 1: /* D6 vline */
7452                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7453                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7454                                         DRM_DEBUG("IH: D6 vline\n");
7455                                 }
7456                                 break;
7457                         default:
7458                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7459                                 break;
7460                         }
7461                         break;
7462                 case 42: /* HPD hotplug */
7463                         switch (src_data) {
7464                         case 0:
7465                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7466                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7467                                         queue_hotplug = true;
7468                                         DRM_DEBUG("IH: HPD1\n");
7469                                 }
7470                                 break;
7471                         case 1:
7472                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7473                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7474                                         queue_hotplug = true;
7475                                         DRM_DEBUG("IH: HPD2\n");
7476                                 }
7477                                 break;
7478                         case 2:
7479                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7480                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7481                                         queue_hotplug = true;
7482                                         DRM_DEBUG("IH: HPD3\n");
7483                                 }
7484                                 break;
7485                         case 3:
7486                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7487                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7488                                         queue_hotplug = true;
7489                                         DRM_DEBUG("IH: HPD4\n");
7490                                 }
7491                                 break;
7492                         case 4:
7493                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7494                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7495                                         queue_hotplug = true;
7496                                         DRM_DEBUG("IH: HPD5\n");
7497                                 }
7498                                 break;
7499                         case 5:
7500                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7501                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7502                                         queue_hotplug = true;
7503                                         DRM_DEBUG("IH: HPD6\n");
7504                                 }
7505                                 break;
7506                         default:
7507                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7508                                 break;
7509                         }
7510                         break;
7511                 case 124: /* UVD */
7512                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7513                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7514                         break;
7515                 case 146:
7516                 case 147:
7517                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7518                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7519                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7520                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7521                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7522                                 addr);
7523                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7524                                 status);
7525                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7526                         /* reset addr and status */
7527                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7528                         break;
7529                 case 167: /* VCE */
7530                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7531                         switch (src_data) {
7532                         case 0:
7533                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7534                                 break;
7535                         case 1:
7536                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7537                                 break;
7538                         default:
7539                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7540                                 break;
7541                         }
7542                         break;
7543                 case 176: /* GFX RB CP_INT */
7544                 case 177: /* GFX IB CP_INT */
7545                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7546                         break;
7547                 case 181: /* CP EOP event */
7548                         DRM_DEBUG("IH: CP EOP\n");
7549                         /* XXX check the bitfield order! */
7550                         me_id = (ring_id & 0x60) >> 5;
7551                         pipe_id = (ring_id & 0x18) >> 3;
7552                         queue_id = (ring_id & 0x7) >> 0;
7553                         switch (me_id) {
7554                         case 0:
7555                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7556                                 break;
7557                         case 1:
7558                         case 2:
7559                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7560                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7561                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7562                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7563                                 break;
7564                         }
7565                         break;
7566                 case 184: /* CP Privileged reg access */
7567                         DRM_ERROR("Illegal register access in command stream\n");
7568                         /* XXX check the bitfield order! */
7569                         me_id = (ring_id & 0x60) >> 5;
7570                         pipe_id = (ring_id & 0x18) >> 3;
7571                         queue_id = (ring_id & 0x7) >> 0;
7572                         switch (me_id) {
7573                         case 0:
7574                                 /* This results in a full GPU reset, but all we need to do is soft
7575                                  * reset the CP for gfx
7576                                  */
7577                                 queue_reset = true;
7578                                 break;
7579                         case 1:
7580                                 /* XXX compute */
7581                                 queue_reset = true;
7582                                 break;
7583                         case 2:
7584                                 /* XXX compute */
7585                                 queue_reset = true;
7586                                 break;
7587                         }
7588                         break;
7589                 case 185: /* CP Privileged inst */
7590                         DRM_ERROR("Illegal instruction in command stream\n");
7591                         /* XXX check the bitfield order! */
7592                         me_id = (ring_id & 0x60) >> 5;
7593                         pipe_id = (ring_id & 0x18) >> 3;
7594                         queue_id = (ring_id & 0x7) >> 0;
7595                         switch (me_id) {
7596                         case 0:
7597                                 /* This results in a full GPU reset, but all we need to do is soft
7598                                  * reset the CP for gfx
7599                                  */
7600                                 queue_reset = true;
7601                                 break;
7602                         case 1:
7603                                 /* XXX compute */
7604                                 queue_reset = true;
7605                                 break;
7606                         case 2:
7607                                 /* XXX compute */
7608                                 queue_reset = true;
7609                                 break;
7610                         }
7611                         break;
7612                 case 224: /* SDMA trap event */
7613                         /* XXX check the bitfield order! */
7614                         me_id = (ring_id & 0x3) >> 0;
7615                         queue_id = (ring_id & 0xc) >> 2;
7616                         DRM_DEBUG("IH: SDMA trap\n");
7617                         switch (me_id) {
7618                         case 0:
7619                                 switch (queue_id) {
7620                                 case 0:
7621                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7622                                         break;
7623                                 case 1:
7624                                         /* XXX compute */
7625                                         break;
7626                                 case 2:
7627                                         /* XXX compute */
7628                                         break;
7629                                 }
7630                                 break;
7631                         case 1:
7632                                 switch (queue_id) {
7633                                 case 0:
7634                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7635                                         break;
7636                                 case 1:
7637                                         /* XXX compute */
7638                                         break;
7639                                 case 2:
7640                                         /* XXX compute */
7641                                         break;
7642                                 }
7643                                 break;
7644                         }
7645                         break;
7646                 case 230: /* thermal low to high */
7647                         DRM_DEBUG("IH: thermal low to high\n");
7648                         rdev->pm.dpm.thermal.high_to_low = false;
7649                         queue_thermal = true;
7650                         break;
7651                 case 231: /* thermal high to low */
7652                         DRM_DEBUG("IH: thermal high to low\n");
7653                         rdev->pm.dpm.thermal.high_to_low = true;
7654                         queue_thermal = true;
7655                         break;
7656                 case 233: /* GUI IDLE */
7657                         DRM_DEBUG("IH: GUI idle\n");
7658                         break;
7659                 case 241: /* SDMA Privileged inst */
7660                 case 247: /* SDMA Privileged inst */
7661                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7662                         /* XXX check the bitfield order! */
7663                         me_id = (ring_id & 0x3) >> 0;
7664                         queue_id = (ring_id & 0xc) >> 2;
7665                         switch (me_id) {
7666                         case 0:
7667                                 switch (queue_id) {
7668                                 case 0:
7669                                         queue_reset = true;
7670                                         break;
7671                                 case 1:
7672                                         /* XXX compute */
7673                                         queue_reset = true;
7674                                         break;
7675                                 case 2:
7676                                         /* XXX compute */
7677                                         queue_reset = true;
7678                                         break;
7679                                 }
7680                                 break;
7681                         case 1:
7682                                 switch (queue_id) {
7683                                 case 0:
7684                                         queue_reset = true;
7685                                         break;
7686                                 case 1:
7687                                         /* XXX compute */
7688                                         queue_reset = true;
7689                                         break;
7690                                 case 2:
7691                                         /* XXX compute */
7692                                         queue_reset = true;
7693                                         break;
7694                                 }
7695                                 break;
7696                         }
7697                         break;
7698                 default:
7699                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7700                         break;
7701                 }
7702
7703                 /* wptr/rptr are in bytes! */
7704                 rptr += 16;
7705                 rptr &= rdev->ih.ptr_mask;
7706         }
7707         if (queue_hotplug)
7708                 schedule_work(&rdev->hotplug_work);
7709         if (queue_reset)
7710                 schedule_work(&rdev->reset_work);
7711         if (queue_thermal)
7712                 schedule_work(&rdev->pm.dpm.thermal.work);
7713         rdev->ih.rptr = rptr;
7714         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7715         atomic_set(&rdev->ih.lock, 0);
7716
7717         /* make sure wptr hasn't changed while processing */
7718         wptr = cik_get_ih_wptr(rdev);
7719         if (wptr != rptr)
7720                 goto restart_ih;
7721
7722         return IRQ_HANDLED;
7723 }
7724
7725 /*
7726  * startup/shutdown callbacks
7727  */
7728 /**
7729  * cik_startup - program the asic to a functional state
7730  *
7731  * @rdev: radeon_device pointer
7732  *
7733  * Programs the asic to a functional state (CIK).
7734  * Called by cik_init() and cik_resume().
7735  * Returns 0 for success, error for failure.
7736  */
7737 static int cik_startup(struct radeon_device *rdev)
7738 {
7739         struct radeon_ring *ring;
7740         int r;
7741
7742         /* enable pcie gen2/3 link */
7743         cik_pcie_gen3_enable(rdev);
7744         /* enable aspm */
7745         cik_program_aspm(rdev);
7746
7747         /* scratch needs to be initialized before MC */
7748         r = r600_vram_scratch_init(rdev);
7749         if (r)
7750                 return r;
7751
7752         cik_mc_program(rdev);
7753
7754         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7755                 r = ci_mc_load_microcode(rdev);
7756                 if (r) {
7757                         DRM_ERROR("Failed to load MC firmware!\n");
7758                         return r;
7759                 }
7760         }
7761
7762         r = cik_pcie_gart_enable(rdev);
7763         if (r)
7764                 return r;
7765         cik_gpu_init(rdev);
7766
7767         /* allocate rlc buffers */
7768         if (rdev->flags & RADEON_IS_IGP) {
7769                 if (rdev->family == CHIP_KAVERI) {
7770                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7771                         rdev->rlc.reg_list_size =
7772                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7773                 } else {
7774                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7775                         rdev->rlc.reg_list_size =
7776                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7777                 }
7778         }
7779         rdev->rlc.cs_data = ci_cs_data;
7780         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7781         r = sumo_rlc_init(rdev);
7782         if (r) {
7783                 DRM_ERROR("Failed to init rlc BOs!\n");
7784                 return r;
7785         }
7786
7787         /* allocate wb buffer */
7788         r = radeon_wb_init(rdev);
7789         if (r)
7790                 return r;
7791
7792         /* allocate mec buffers */
7793         r = cik_mec_init(rdev);
7794         if (r) {
7795                 DRM_ERROR("Failed to init MEC BOs!\n");
7796                 return r;
7797         }
7798
7799         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7800         if (r) {
7801                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7802                 return r;
7803         }
7804
7805         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7806         if (r) {
7807                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7808                 return r;
7809         }
7810
7811         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7812         if (r) {
7813                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7814                 return r;
7815         }
7816
7817         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7818         if (r) {
7819                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7820                 return r;
7821         }
7822
7823         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7824         if (r) {
7825                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7826                 return r;
7827         }
7828
7829         r = radeon_uvd_resume(rdev);
7830         if (!r) {
7831                 r = uvd_v4_2_resume(rdev);
7832                 if (!r) {
7833                         r = radeon_fence_driver_start_ring(rdev,
7834                                                            R600_RING_TYPE_UVD_INDEX);
7835                         if (r)
7836                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7837                 }
7838         }
7839         if (r)
7840                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7841
7842         r = radeon_vce_resume(rdev);
7843         if (!r) {
7844                 r = vce_v2_0_resume(rdev);
7845                 if (!r)
7846                         r = radeon_fence_driver_start_ring(rdev,
7847                                                            TN_RING_TYPE_VCE1_INDEX);
7848                 if (!r)
7849                         r = radeon_fence_driver_start_ring(rdev,
7850                                                            TN_RING_TYPE_VCE2_INDEX);
7851         }
7852         if (r) {
7853                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
7854                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7855                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7856         }
7857
7858         /* Enable IRQ */
7859         if (!rdev->irq.installed) {
7860                 r = radeon_irq_kms_init(rdev);
7861                 if (r)
7862                         return r;
7863         }
7864
7865         r = cik_irq_init(rdev);
7866         if (r) {
7867                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7868                 radeon_irq_kms_fini(rdev);
7869                 return r;
7870         }
7871         cik_irq_set(rdev);
7872
7873         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7874         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7875                              PACKET3(PACKET3_NOP, 0x3FFF));
7876         if (r)
7877                 return r;
7878
7879         /* set up the compute queues */
7880         /* type-2 packets are deprecated on MEC, use type-3 instead */
7881         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7882         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7883                              PACKET3(PACKET3_NOP, 0x3FFF));
7884         if (r)
7885                 return r;
7886         ring->me = 1; /* first MEC */
7887         ring->pipe = 0; /* first pipe */
7888         ring->queue = 0; /* first queue */
7889         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7890
7891         /* type-2 packets are deprecated on MEC, use type-3 instead */
7892         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7893         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7894                              PACKET3(PACKET3_NOP, 0x3FFF));
7895         if (r)
7896                 return r;
7897         /* dGPU only have 1 MEC */
7898         ring->me = 1; /* first MEC */
7899         ring->pipe = 0; /* first pipe */
7900         ring->queue = 1; /* second queue */
7901         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7902
7903         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7904         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7905                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7906         if (r)
7907                 return r;
7908
7909         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7910         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7911                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7912         if (r)
7913                 return r;
7914
7915         r = cik_cp_resume(rdev);
7916         if (r)
7917                 return r;
7918
7919         r = cik_sdma_resume(rdev);
7920         if (r)
7921                 return r;
7922
7923         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7924         if (ring->ring_size) {
7925                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7926                                      RADEON_CP_PACKET2);
7927                 if (!r)
7928                         r = uvd_v1_0_init(rdev);
7929                 if (r)
7930                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7931         }
7932
7933         r = -ENOENT;
7934
7935         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7936         if (ring->ring_size)
7937                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7938                                      VCE_CMD_NO_OP);
7939
7940         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7941         if (ring->ring_size)
7942                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7943                                      VCE_CMD_NO_OP);
7944
7945         if (!r)
7946                 r = vce_v1_0_init(rdev);
7947         else if (r != -ENOENT)
7948                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7949
7950         r = radeon_ib_pool_init(rdev);
7951         if (r) {
7952                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7953                 return r;
7954         }
7955
7956         r = radeon_vm_manager_init(rdev);
7957         if (r) {
7958                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7959                 return r;
7960         }
7961
7962         r = dce6_audio_init(rdev);
7963         if (r)
7964                 return r;
7965
7966         return 0;
7967 }
7968
7969 /**
7970  * cik_resume - resume the asic to a functional state
7971  *
7972  * @rdev: radeon_device pointer
7973  *
7974  * Programs the asic to a functional state (CIK).
7975  * Called at resume.
7976  * Returns 0 for success, error for failure.
7977  */
7978 int cik_resume(struct radeon_device *rdev)
7979 {
7980         int r;
7981
7982         /* post card */
7983         atom_asic_init(rdev->mode_info.atom_context);
7984
7985         /* init golden registers */
7986         cik_init_golden_registers(rdev);
7987
7988         if (rdev->pm.pm_method == PM_METHOD_DPM)
7989                 radeon_pm_resume(rdev);
7990
7991         rdev->accel_working = true;
7992         r = cik_startup(rdev);
7993         if (r) {
7994                 DRM_ERROR("cik startup failed on resume\n");
7995                 rdev->accel_working = false;
7996                 return r;
7997         }
7998
7999         return r;
8000
8001 }
8002
8003 /**
8004  * cik_suspend - suspend the asic
8005  *
8006  * @rdev: radeon_device pointer
8007  *
8008  * Bring the chip into a state suitable for suspend (CIK).
8009  * Called at suspend.
8010  * Returns 0 for success.
8011  */
8012 int cik_suspend(struct radeon_device *rdev)
8013 {
8014         radeon_pm_suspend(rdev);
8015         dce6_audio_fini(rdev);
8016         radeon_vm_manager_fini(rdev);
8017         cik_cp_enable(rdev, false);
8018         cik_sdma_enable(rdev, false);
8019         uvd_v1_0_fini(rdev);
8020         radeon_uvd_suspend(rdev);
8021         radeon_vce_suspend(rdev);
8022         cik_fini_pg(rdev);
8023         cik_fini_cg(rdev);
8024         cik_irq_suspend(rdev);
8025         radeon_wb_disable(rdev);
8026         cik_pcie_gart_disable(rdev);
8027         return 0;
8028 }
8029
8030 /* Plan is to move initialization in that function and use
8031  * helper function so that radeon_device_init pretty much
8032  * do nothing more than calling asic specific function. This
8033  * should also allow to remove a bunch of callback function
8034  * like vram_info.
8035  */
8036 /**
8037  * cik_init - asic specific driver and hw init
8038  *
8039  * @rdev: radeon_device pointer
8040  *
8041  * Setup asic specific driver variables and program the hw
8042  * to a functional state (CIK).
8043  * Called at driver startup.
8044  * Returns 0 for success, errors for failure.
8045  */
8046 int cik_init(struct radeon_device *rdev)
8047 {
8048         struct radeon_ring *ring;
8049         int r;
8050
8051         /* Read BIOS */
8052         if (!radeon_get_bios(rdev)) {
8053                 if (ASIC_IS_AVIVO(rdev))
8054                         return -EINVAL;
8055         }
8056         /* Must be an ATOMBIOS */
8057         if (!rdev->is_atom_bios) {
8058                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8059                 return -EINVAL;
8060         }
8061         r = radeon_atombios_init(rdev);
8062         if (r)
8063                 return r;
8064
8065         /* Post card if necessary */
8066         if (!radeon_card_posted(rdev)) {
8067                 if (!rdev->bios) {
8068                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8069                         return -EINVAL;
8070                 }
8071                 DRM_INFO("GPU not posted. posting now...\n");
8072                 atom_asic_init(rdev->mode_info.atom_context);
8073         }
8074         /* init golden registers */
8075         cik_init_golden_registers(rdev);
8076         /* Initialize scratch registers */
8077         cik_scratch_init(rdev);
8078         /* Initialize surface registers */
8079         radeon_surface_init(rdev);
8080         /* Initialize clocks */
8081         radeon_get_clock_info(rdev->ddev);
8082
8083         /* Fence driver */
8084         r = radeon_fence_driver_init(rdev);
8085         if (r)
8086                 return r;
8087
8088         /* initialize memory controller */
8089         r = cik_mc_init(rdev);
8090         if (r)
8091                 return r;
8092         /* Memory manager */
8093         r = radeon_bo_init(rdev);
8094         if (r)
8095                 return r;
8096
8097         if (rdev->flags & RADEON_IS_IGP) {
8098                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8099                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8100                         r = cik_init_microcode(rdev);
8101                         if (r) {
8102                                 DRM_ERROR("Failed to load firmware!\n");
8103                                 return r;
8104                         }
8105                 }
8106         } else {
8107                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8108                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8109                     !rdev->mc_fw) {
8110                         r = cik_init_microcode(rdev);
8111                         if (r) {
8112                                 DRM_ERROR("Failed to load firmware!\n");
8113                                 return r;
8114                         }
8115                 }
8116         }
8117
8118         /* Initialize power management */
8119         radeon_pm_init(rdev);
8120
8121         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8122         ring->ring_obj = NULL;
8123         r600_ring_init(rdev, ring, 1024 * 1024);
8124
8125         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8126         ring->ring_obj = NULL;
8127         r600_ring_init(rdev, ring, 1024 * 1024);
8128         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8129         if (r)
8130                 return r;
8131
8132         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8133         ring->ring_obj = NULL;
8134         r600_ring_init(rdev, ring, 1024 * 1024);
8135         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8136         if (r)
8137                 return r;
8138
8139         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8140         ring->ring_obj = NULL;
8141         r600_ring_init(rdev, ring, 256 * 1024);
8142
8143         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8144         ring->ring_obj = NULL;
8145         r600_ring_init(rdev, ring, 256 * 1024);
8146
8147         r = radeon_uvd_init(rdev);
8148         if (!r) {
8149                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8150                 ring->ring_obj = NULL;
8151                 r600_ring_init(rdev, ring, 4096);
8152         }
8153
8154         r = radeon_vce_init(rdev);
8155         if (!r) {
8156                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8157                 ring->ring_obj = NULL;
8158                 r600_ring_init(rdev, ring, 4096);
8159
8160                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8161                 ring->ring_obj = NULL;
8162                 r600_ring_init(rdev, ring, 4096);
8163         }
8164
8165         rdev->ih.ring_obj = NULL;
8166         r600_ih_ring_init(rdev, 64 * 1024);
8167
8168         r = r600_pcie_gart_init(rdev);
8169         if (r)
8170                 return r;
8171
8172         rdev->accel_working = true;
8173         r = cik_startup(rdev);
8174         if (r) {
8175                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8176                 cik_cp_fini(rdev);
8177                 cik_sdma_fini(rdev);
8178                 cik_irq_fini(rdev);
8179                 sumo_rlc_fini(rdev);
8180                 cik_mec_fini(rdev);
8181                 radeon_wb_fini(rdev);
8182                 radeon_ib_pool_fini(rdev);
8183                 radeon_vm_manager_fini(rdev);
8184                 radeon_irq_kms_fini(rdev);
8185                 cik_pcie_gart_fini(rdev);
8186                 rdev->accel_working = false;
8187         }
8188
8189         /* Don't start up if the MC ucode is missing.
8190          * The default clocks and voltages before the MC ucode
8191          * is loaded are not suffient for advanced operations.
8192          */
8193         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8194                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8195                 return -EINVAL;
8196         }
8197
8198         return 0;
8199 }
8200
8201 /**
8202  * cik_fini - asic specific driver and hw fini
8203  *
8204  * @rdev: radeon_device pointer
8205  *
8206  * Tear down the asic specific driver variables and program the hw
8207  * to an idle state (CIK).
8208  * Called at driver unload.
8209  */
8210 void cik_fini(struct radeon_device *rdev)
8211 {
8212         radeon_pm_fini(rdev);
8213         cik_cp_fini(rdev);
8214         cik_sdma_fini(rdev);
8215         cik_fini_pg(rdev);
8216         cik_fini_cg(rdev);
8217         cik_irq_fini(rdev);
8218         sumo_rlc_fini(rdev);
8219         cik_mec_fini(rdev);
8220         radeon_wb_fini(rdev);
8221         radeon_vm_manager_fini(rdev);
8222         radeon_ib_pool_fini(rdev);
8223         radeon_irq_kms_fini(rdev);
8224         uvd_v1_0_fini(rdev);
8225         radeon_uvd_fini(rdev);
8226         radeon_vce_fini(rdev);
8227         cik_pcie_gart_fini(rdev);
8228         r600_vram_scratch_fini(rdev);
8229         radeon_gem_fini(rdev);
8230         radeon_fence_driver_fini(rdev);
8231         radeon_bo_fini(rdev);
8232         radeon_atombios_fini(rdev);
8233         kfree(rdev->bios);
8234         rdev->bios = NULL;
8235 }
8236
8237 void dce8_program_fmt(struct drm_encoder *encoder)
8238 {
8239         struct drm_device *dev = encoder->dev;
8240         struct radeon_device *rdev = dev->dev_private;
8241         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8242         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8243         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8244         int bpc = 0;
8245         u32 tmp = 0;
8246         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8247
8248         if (connector) {
8249                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8250                 bpc = radeon_get_monitor_bpc(connector);
8251                 dither = radeon_connector->dither;
8252         }
8253
8254         /* LVDS/eDP FMT is set up by atom */
8255         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8256                 return;
8257
8258         /* not needed for analog */
8259         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8260             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8261                 return;
8262
8263         if (bpc == 0)
8264                 return;
8265
8266         switch (bpc) {
8267         case 6:
8268                 if (dither == RADEON_FMT_DITHER_ENABLE)
8269                         /* XXX sort out optimal dither settings */
8270                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8271                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8272                 else
8273                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8274                 break;
8275         case 8:
8276                 if (dither == RADEON_FMT_DITHER_ENABLE)
8277                         /* XXX sort out optimal dither settings */
8278                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8279                                 FMT_RGB_RANDOM_ENABLE |
8280                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8281                 else
8282                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8283                 break;
8284         case 10:
8285                 if (dither == RADEON_FMT_DITHER_ENABLE)
8286                         /* XXX sort out optimal dither settings */
8287                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8288                                 FMT_RGB_RANDOM_ENABLE |
8289                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8290                 else
8291                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8292                 break;
8293         default:
8294                 /* not needed */
8295                 break;
8296         }
8297
8298         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8299 }
8300
8301 /* display watermark setup */
8302 /**
8303  * dce8_line_buffer_adjust - Set up the line buffer
8304  *
8305  * @rdev: radeon_device pointer
8306  * @radeon_crtc: the selected display controller
8307  * @mode: the current display mode on the selected display
8308  * controller
8309  *
8310  * Setup up the line buffer allocation for
8311  * the selected display controller (CIK).
8312  * Returns the line buffer size in pixels.
8313  */
8314 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8315                                    struct radeon_crtc *radeon_crtc,
8316                                    struct drm_display_mode *mode)
8317 {
8318         u32 tmp, buffer_alloc, i;
8319         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8320         /*
8321          * Line Buffer Setup
8322          * There are 6 line buffers, one for each display controllers.
8323          * There are 3 partitions per LB. Select the number of partitions
8324          * to enable based on the display width.  For display widths larger
8325          * than 4096, you need use to use 2 display controllers and combine
8326          * them using the stereo blender.
8327          */
8328         if (radeon_crtc->base.enabled && mode) {
8329                 if (mode->crtc_hdisplay < 1920) {
8330                         tmp = 1;
8331                         buffer_alloc = 2;
8332                 } else if (mode->crtc_hdisplay < 2560) {
8333                         tmp = 2;
8334                         buffer_alloc = 2;
8335                 } else if (mode->crtc_hdisplay < 4096) {
8336                         tmp = 0;
8337                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8338                 } else {
8339                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8340                         tmp = 0;
8341                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8342                 }
8343         } else {
8344                 tmp = 1;
8345                 buffer_alloc = 0;
8346         }
8347
8348         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8349                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8350
8351         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8352                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8353         for (i = 0; i < rdev->usec_timeout; i++) {
8354                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8355                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8356                         break;
8357                 udelay(1);
8358         }
8359
8360         if (radeon_crtc->base.enabled && mode) {
8361                 switch (tmp) {
8362                 case 0:
8363                 default:
8364                         return 4096 * 2;
8365                 case 1:
8366                         return 1920 * 2;
8367                 case 2:
8368                         return 2560 * 2;
8369                 }
8370         }
8371
8372         /* controller not enabled, so no lb used */
8373         return 0;
8374 }
8375
8376 /**
8377  * cik_get_number_of_dram_channels - get the number of dram channels
8378  *
8379  * @rdev: radeon_device pointer
8380  *
8381  * Look up the number of video ram channels (CIK).
8382  * Used for display watermark bandwidth calculations
8383  * Returns the number of dram channels
8384  */
8385 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8386 {
8387         u32 tmp = RREG32(MC_SHARED_CHMAP);
8388
8389         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8390         case 0:
8391         default:
8392                 return 1;
8393         case 1:
8394                 return 2;
8395         case 2:
8396                 return 4;
8397         case 3:
8398                 return 8;
8399         case 4:
8400                 return 3;
8401         case 5:
8402                 return 6;
8403         case 6:
8404                 return 10;
8405         case 7:
8406                 return 12;
8407         case 8:
8408                 return 16;
8409         }
8410 }
8411
8412 struct dce8_wm_params {
8413         u32 dram_channels; /* number of dram channels */
8414         u32 yclk;          /* bandwidth per dram data pin in kHz */
8415         u32 sclk;          /* engine clock in kHz */
8416         u32 disp_clk;      /* display clock in kHz */
8417         u32 src_width;     /* viewport width */
8418         u32 active_time;   /* active display time in ns */
8419         u32 blank_time;    /* blank time in ns */
8420         bool interlaced;    /* mode is interlaced */
8421         fixed20_12 vsc;    /* vertical scale ratio */
8422         u32 num_heads;     /* number of active crtcs */
8423         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8424         u32 lb_size;       /* line buffer allocated to pipe */
8425         u32 vtaps;         /* vertical scaler taps */
8426 };
8427
8428 /**
8429  * dce8_dram_bandwidth - get the dram bandwidth
8430  *
8431  * @wm: watermark calculation data
8432  *
8433  * Calculate the raw dram bandwidth (CIK).
8434  * Used for display watermark bandwidth calculations
8435  * Returns the dram bandwidth in MBytes/s
8436  */
8437 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8438 {
8439         /* Calculate raw DRAM Bandwidth */
8440         fixed20_12 dram_efficiency; /* 0.7 */
8441         fixed20_12 yclk, dram_channels, bandwidth;
8442         fixed20_12 a;
8443
8444         a.full = dfixed_const(1000);
8445         yclk.full = dfixed_const(wm->yclk);
8446         yclk.full = dfixed_div(yclk, a);
8447         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8448         a.full = dfixed_const(10);
8449         dram_efficiency.full = dfixed_const(7);
8450         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8451         bandwidth.full = dfixed_mul(dram_channels, yclk);
8452         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8453
8454         return dfixed_trunc(bandwidth);
8455 }
8456
8457 /**
8458  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8459  *
8460  * @wm: watermark calculation data
8461  *
8462  * Calculate the dram bandwidth used for display (CIK).
8463  * Used for display watermark bandwidth calculations
8464  * Returns the dram bandwidth for display in MBytes/s
8465  */
8466 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8467 {
8468         /* Calculate DRAM Bandwidth and the part allocated to display. */
8469         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8470         fixed20_12 yclk, dram_channels, bandwidth;
8471         fixed20_12 a;
8472
8473         a.full = dfixed_const(1000);
8474         yclk.full = dfixed_const(wm->yclk);
8475         yclk.full = dfixed_div(yclk, a);
8476         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8477         a.full = dfixed_const(10);
8478         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8479         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8480         bandwidth.full = dfixed_mul(dram_channels, yclk);
8481         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8482
8483         return dfixed_trunc(bandwidth);
8484 }
8485
8486 /**
8487  * dce8_data_return_bandwidth - get the data return bandwidth
8488  *
8489  * @wm: watermark calculation data
8490  *
8491  * Calculate the data return bandwidth used for display (CIK).
8492  * Used for display watermark bandwidth calculations
8493  * Returns the data return bandwidth in MBytes/s
8494  */
8495 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8496 {
8497         /* Calculate the display Data return Bandwidth */
8498         fixed20_12 return_efficiency; /* 0.8 */
8499         fixed20_12 sclk, bandwidth;
8500         fixed20_12 a;
8501
8502         a.full = dfixed_const(1000);
8503         sclk.full = dfixed_const(wm->sclk);
8504         sclk.full = dfixed_div(sclk, a);
8505         a.full = dfixed_const(10);
8506         return_efficiency.full = dfixed_const(8);
8507         return_efficiency.full = dfixed_div(return_efficiency, a);
8508         a.full = dfixed_const(32);
8509         bandwidth.full = dfixed_mul(a, sclk);
8510         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8511
8512         return dfixed_trunc(bandwidth);
8513 }
8514
8515 /**
8516  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8517  *
8518  * @wm: watermark calculation data
8519  *
8520  * Calculate the dmif bandwidth used for display (CIK).
8521  * Used for display watermark bandwidth calculations
8522  * Returns the dmif bandwidth in MBytes/s
8523  */
8524 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8525 {
8526         /* Calculate the DMIF Request Bandwidth */
8527         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8528         fixed20_12 disp_clk, bandwidth;
8529         fixed20_12 a, b;
8530
8531         a.full = dfixed_const(1000);
8532         disp_clk.full = dfixed_const(wm->disp_clk);
8533         disp_clk.full = dfixed_div(disp_clk, a);
8534         a.full = dfixed_const(32);
8535         b.full = dfixed_mul(a, disp_clk);
8536
8537         a.full = dfixed_const(10);
8538         disp_clk_request_efficiency.full = dfixed_const(8);
8539         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8540
8541         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8542
8543         return dfixed_trunc(bandwidth);
8544 }
8545
8546 /**
8547  * dce8_available_bandwidth - get the min available bandwidth
8548  *
8549  * @wm: watermark calculation data
8550  *
8551  * Calculate the min available bandwidth used for display (CIK).
8552  * Used for display watermark bandwidth calculations
8553  * Returns the min available bandwidth in MBytes/s
8554  */
8555 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8556 {
8557         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8558         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8559         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8560         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8561
8562         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8563 }
8564
8565 /**
8566  * dce8_average_bandwidth - get the average available bandwidth
8567  *
8568  * @wm: watermark calculation data
8569  *
8570  * Calculate the average available bandwidth used for display (CIK).
8571  * Used for display watermark bandwidth calculations
8572  * Returns the average available bandwidth in MBytes/s
8573  */
8574 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8575 {
8576         /* Calculate the display mode Average Bandwidth
8577          * DisplayMode should contain the source and destination dimensions,
8578          * timing, etc.
8579          */
8580         fixed20_12 bpp;
8581         fixed20_12 line_time;
8582         fixed20_12 src_width;
8583         fixed20_12 bandwidth;
8584         fixed20_12 a;
8585
8586         a.full = dfixed_const(1000);
8587         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8588         line_time.full = dfixed_div(line_time, a);
8589         bpp.full = dfixed_const(wm->bytes_per_pixel);
8590         src_width.full = dfixed_const(wm->src_width);
8591         bandwidth.full = dfixed_mul(src_width, bpp);
8592         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8593         bandwidth.full = dfixed_div(bandwidth, line_time);
8594
8595         return dfixed_trunc(bandwidth);
8596 }
8597
8598 /**
8599  * dce8_latency_watermark - get the latency watermark
8600  *
8601  * @wm: watermark calculation data
8602  *
8603  * Calculate the latency watermark (CIK).
8604  * Used for display watermark bandwidth calculations
8605  * Returns the latency watermark in ns
8606  */
8607 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8608 {
8609         /* First calculate the latency in ns */
8610         u32 mc_latency = 2000; /* 2000 ns. */
8611         u32 available_bandwidth = dce8_available_bandwidth(wm);
8612         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8613         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8614         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8615         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8616                 (wm->num_heads * cursor_line_pair_return_time);
8617         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8618         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8619         u32 tmp, dmif_size = 12288;
8620         fixed20_12 a, b, c;
8621
8622         if (wm->num_heads == 0)
8623                 return 0;
8624
8625         a.full = dfixed_const(2);
8626         b.full = dfixed_const(1);
8627         if ((wm->vsc.full > a.full) ||
8628             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8629             (wm->vtaps >= 5) ||
8630             ((wm->vsc.full >= a.full) && wm->interlaced))
8631                 max_src_lines_per_dst_line = 4;
8632         else
8633                 max_src_lines_per_dst_line = 2;
8634
8635         a.full = dfixed_const(available_bandwidth);
8636         b.full = dfixed_const(wm->num_heads);
8637         a.full = dfixed_div(a, b);
8638
8639         b.full = dfixed_const(mc_latency + 512);
8640         c.full = dfixed_const(wm->disp_clk);
8641         b.full = dfixed_div(b, c);
8642
8643         c.full = dfixed_const(dmif_size);
8644         b.full = dfixed_div(c, b);
8645
8646         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8647
8648         b.full = dfixed_const(1000);
8649         c.full = dfixed_const(wm->disp_clk);
8650         b.full = dfixed_div(c, b);
8651         c.full = dfixed_const(wm->bytes_per_pixel);
8652         b.full = dfixed_mul(b, c);
8653
8654         lb_fill_bw = min(tmp, dfixed_trunc(b));
8655
8656         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8657         b.full = dfixed_const(1000);
8658         c.full = dfixed_const(lb_fill_bw);
8659         b.full = dfixed_div(c, b);
8660         a.full = dfixed_div(a, b);
8661         line_fill_time = dfixed_trunc(a);
8662
8663         if (line_fill_time < wm->active_time)
8664                 return latency;
8665         else
8666                 return latency + (line_fill_time - wm->active_time);
8667
8668 }
8669
8670 /**
8671  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8672  * average and available dram bandwidth
8673  *
8674  * @wm: watermark calculation data
8675  *
8676  * Check if the display average bandwidth fits in the display
8677  * dram bandwidth (CIK).
8678  * Used for display watermark bandwidth calculations
8679  * Returns true if the display fits, false if not.
8680  */
8681 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8682 {
8683         if (dce8_average_bandwidth(wm) <=
8684             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8685                 return true;
8686         else
8687                 return false;
8688 }
8689
8690 /**
8691  * dce8_average_bandwidth_vs_available_bandwidth - check
8692  * average and available bandwidth
8693  *
8694  * @wm: watermark calculation data
8695  *
8696  * Check if the display average bandwidth fits in the display
8697  * available bandwidth (CIK).
8698  * Used for display watermark bandwidth calculations
8699  * Returns true if the display fits, false if not.
8700  */
8701 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8702 {
8703         if (dce8_average_bandwidth(wm) <=
8704             (dce8_available_bandwidth(wm) / wm->num_heads))
8705                 return true;
8706         else
8707                 return false;
8708 }
8709
8710 /**
8711  * dce8_check_latency_hiding - check latency hiding
8712  *
8713  * @wm: watermark calculation data
8714  *
8715  * Check latency hiding (CIK).
8716  * Used for display watermark bandwidth calculations
8717  * Returns true if the display fits, false if not.
8718  */
8719 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8720 {
8721         u32 lb_partitions = wm->lb_size / wm->src_width;
8722         u32 line_time = wm->active_time + wm->blank_time;
8723         u32 latency_tolerant_lines;
8724         u32 latency_hiding;
8725         fixed20_12 a;
8726
8727         a.full = dfixed_const(1);
8728         if (wm->vsc.full > a.full)
8729                 latency_tolerant_lines = 1;
8730         else {
8731                 if (lb_partitions <= (wm->vtaps + 1))
8732                         latency_tolerant_lines = 1;
8733                 else
8734                         latency_tolerant_lines = 2;
8735         }
8736
8737         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8738
8739         if (dce8_latency_watermark(wm) <= latency_hiding)
8740                 return true;
8741         else
8742                 return false;
8743 }
8744
8745 /**
8746  * dce8_program_watermarks - program display watermarks
8747  *
8748  * @rdev: radeon_device pointer
8749  * @radeon_crtc: the selected display controller
8750  * @lb_size: line buffer size
8751  * @num_heads: number of display controllers in use
8752  *
8753  * Calculate and program the display watermarks for the
8754  * selected display controller (CIK).
8755  */
8756 static void dce8_program_watermarks(struct radeon_device *rdev,
8757                                     struct radeon_crtc *radeon_crtc,
8758                                     u32 lb_size, u32 num_heads)
8759 {
8760         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8761         struct dce8_wm_params wm_low, wm_high;
8762         u32 pixel_period;
8763         u32 line_time = 0;
8764         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8765         u32 tmp, wm_mask;
8766
8767         if (radeon_crtc->base.enabled && num_heads && mode) {
8768                 pixel_period = 1000000 / (u32)mode->clock;
8769                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8770
8771                 /* watermark for high clocks */
8772                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8773                     rdev->pm.dpm_enabled) {
8774                         wm_high.yclk =
8775                                 radeon_dpm_get_mclk(rdev, false) * 10;
8776                         wm_high.sclk =
8777                                 radeon_dpm_get_sclk(rdev, false) * 10;
8778                 } else {
8779                         wm_high.yclk = rdev->pm.current_mclk * 10;
8780                         wm_high.sclk = rdev->pm.current_sclk * 10;
8781                 }
8782
8783                 wm_high.disp_clk = mode->clock;
8784                 wm_high.src_width = mode->crtc_hdisplay;
8785                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8786                 wm_high.blank_time = line_time - wm_high.active_time;
8787                 wm_high.interlaced = false;
8788                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8789                         wm_high.interlaced = true;
8790                 wm_high.vsc = radeon_crtc->vsc;
8791                 wm_high.vtaps = 1;
8792                 if (radeon_crtc->rmx_type != RMX_OFF)
8793                         wm_high.vtaps = 2;
8794                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8795                 wm_high.lb_size = lb_size;
8796                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8797                 wm_high.num_heads = num_heads;
8798
8799                 /* set for high clocks */
8800                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8801
8802                 /* possibly force display priority to high */
8803                 /* should really do this at mode validation time... */
8804                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8805                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8806                     !dce8_check_latency_hiding(&wm_high) ||
8807                     (rdev->disp_priority == 2)) {
8808                         DRM_DEBUG_KMS("force priority to high\n");
8809                 }
8810
8811                 /* watermark for low clocks */
8812                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8813                     rdev->pm.dpm_enabled) {
8814                         wm_low.yclk =
8815                                 radeon_dpm_get_mclk(rdev, true) * 10;
8816                         wm_low.sclk =
8817                                 radeon_dpm_get_sclk(rdev, true) * 10;
8818                 } else {
8819                         wm_low.yclk = rdev->pm.current_mclk * 10;
8820                         wm_low.sclk = rdev->pm.current_sclk * 10;
8821                 }
8822
8823                 wm_low.disp_clk = mode->clock;
8824                 wm_low.src_width = mode->crtc_hdisplay;
8825                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8826                 wm_low.blank_time = line_time - wm_low.active_time;
8827                 wm_low.interlaced = false;
8828                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8829                         wm_low.interlaced = true;
8830                 wm_low.vsc = radeon_crtc->vsc;
8831                 wm_low.vtaps = 1;
8832                 if (radeon_crtc->rmx_type != RMX_OFF)
8833                         wm_low.vtaps = 2;
8834                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8835                 wm_low.lb_size = lb_size;
8836                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8837                 wm_low.num_heads = num_heads;
8838
8839                 /* set for low clocks */
8840                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8841
8842                 /* possibly force display priority to high */
8843                 /* should really do this at mode validation time... */
8844                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8845                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8846                     !dce8_check_latency_hiding(&wm_low) ||
8847                     (rdev->disp_priority == 2)) {
8848                         DRM_DEBUG_KMS("force priority to high\n");
8849                 }
8850         }
8851
8852         /* select wm A */
8853         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8854         tmp = wm_mask;
8855         tmp &= ~LATENCY_WATERMARK_MASK(3);
8856         tmp |= LATENCY_WATERMARK_MASK(1);
8857         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8858         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8859                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8860                 LATENCY_HIGH_WATERMARK(line_time)));
8861         /* select wm B */
8862         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8863         tmp &= ~LATENCY_WATERMARK_MASK(3);
8864         tmp |= LATENCY_WATERMARK_MASK(2);
8865         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8866         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8867                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8868                 LATENCY_HIGH_WATERMARK(line_time)));
8869         /* restore original selection */
8870         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8871
8872         /* save values for DPM */
8873         radeon_crtc->line_time = line_time;
8874         radeon_crtc->wm_high = latency_watermark_a;
8875         radeon_crtc->wm_low = latency_watermark_b;
8876 }
8877
8878 /**
8879  * dce8_bandwidth_update - program display watermarks
8880  *
8881  * @rdev: radeon_device pointer
8882  *
8883  * Calculate and program the display watermarks and line
8884  * buffer allocation (CIK).
8885  */
8886 void dce8_bandwidth_update(struct radeon_device *rdev)
8887 {
8888         struct drm_display_mode *mode = NULL;
8889         u32 num_heads = 0, lb_size;
8890         int i;
8891
8892         radeon_update_display_priority(rdev);
8893
8894         for (i = 0; i < rdev->num_crtc; i++) {
8895                 if (rdev->mode_info.crtcs[i]->base.enabled)
8896                         num_heads++;
8897         }
8898         for (i = 0; i < rdev->num_crtc; i++) {
8899                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8900                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8901                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8902         }
8903 }
8904
8905 /**
8906  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8907  *
8908  * @rdev: radeon_device pointer
8909  *
8910  * Fetches a GPU clock counter snapshot (SI).
8911  * Returns the 64 bit clock counter snapshot.
8912  */
8913 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8914 {
8915         uint64_t clock;
8916
8917         mutex_lock(&rdev->gpu_clock_mutex);
8918         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8919         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8920                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8921         mutex_unlock(&rdev->gpu_clock_mutex);
8922         return clock;
8923 }
8924
8925 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8926                               u32 cntl_reg, u32 status_reg)
8927 {
8928         int r, i;
8929         struct atom_clock_dividers dividers;
8930         uint32_t tmp;
8931
8932         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8933                                            clock, false, &dividers);
8934         if (r)
8935                 return r;
8936
8937         tmp = RREG32_SMC(cntl_reg);
8938         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8939         tmp |= dividers.post_divider;
8940         WREG32_SMC(cntl_reg, tmp);
8941
8942         for (i = 0; i < 100; i++) {
8943                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8944                         break;
8945                 mdelay(10);
8946         }
8947         if (i == 100)
8948                 return -ETIMEDOUT;
8949
8950         return 0;
8951 }
8952
8953 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8954 {
8955         int r = 0;
8956
8957         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8958         if (r)
8959                 return r;
8960
8961         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8962         return r;
8963 }
8964
8965 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8966 {
8967         int r, i;
8968         struct atom_clock_dividers dividers;
8969         u32 tmp;
8970
8971         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8972                                            ecclk, false, &dividers);
8973         if (r)
8974                 return r;
8975
8976         for (i = 0; i < 100; i++) {
8977                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8978                         break;
8979                 mdelay(10);
8980         }
8981         if (i == 100)
8982                 return -ETIMEDOUT;
8983
8984         tmp = RREG32_SMC(CG_ECLK_CNTL);
8985         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8986         tmp |= dividers.post_divider;
8987         WREG32_SMC(CG_ECLK_CNTL, tmp);
8988
8989         for (i = 0; i < 100; i++) {
8990                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8991                         break;
8992                 mdelay(10);
8993         }
8994         if (i == 100)
8995                 return -ETIMEDOUT;
8996
8997         return 0;
8998 }
8999
9000 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9001 {
9002         struct pci_dev *root = rdev->pdev->bus->self;
9003         int bridge_pos, gpu_pos;
9004         u32 speed_cntl, mask, current_data_rate;
9005         int ret, i;
9006         u16 tmp16;
9007
9008         if (radeon_pcie_gen2 == 0)
9009                 return;
9010
9011         if (rdev->flags & RADEON_IS_IGP)
9012                 return;
9013
9014         if (!(rdev->flags & RADEON_IS_PCIE))
9015                 return;
9016
9017         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9018         if (ret != 0)
9019                 return;
9020
9021         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9022                 return;
9023
9024         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9025         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9026                 LC_CURRENT_DATA_RATE_SHIFT;
9027         if (mask & DRM_PCIE_SPEED_80) {
9028                 if (current_data_rate == 2) {
9029                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9030                         return;
9031                 }
9032                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9033         } else if (mask & DRM_PCIE_SPEED_50) {
9034                 if (current_data_rate == 1) {
9035                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9036                         return;
9037                 }
9038                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9039         }
9040
9041         bridge_pos = pci_pcie_cap(root);
9042         if (!bridge_pos)
9043                 return;
9044
9045         gpu_pos = pci_pcie_cap(rdev->pdev);
9046         if (!gpu_pos)
9047                 return;
9048
9049         if (mask & DRM_PCIE_SPEED_80) {
9050                 /* re-try equalization if gen3 is not already enabled */
9051                 if (current_data_rate != 2) {
9052                         u16 bridge_cfg, gpu_cfg;
9053                         u16 bridge_cfg2, gpu_cfg2;
9054                         u32 max_lw, current_lw, tmp;
9055
9056                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9057                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9058
9059                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9060                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9061
9062                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9063                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9064
9065                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9066                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9067                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9068
9069                         if (current_lw < max_lw) {
9070                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9071                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9072                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9073                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9074                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9075                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9076                                 }
9077                         }
9078
9079                         for (i = 0; i < 10; i++) {
9080                                 /* check status */
9081                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9082                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9083                                         break;
9084
9085                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9086                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9087
9088                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9089                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9090
9091                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9092                                 tmp |= LC_SET_QUIESCE;
9093                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9094
9095                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9096                                 tmp |= LC_REDO_EQ;
9097                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9098
9099                                 mdelay(100);
9100
9101                                 /* linkctl */
9102                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9103                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9104                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9105                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9106
9107                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9108                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9109                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9110                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9111
9112                                 /* linkctl2 */
9113                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9114                                 tmp16 &= ~((1 << 4) | (7 << 9));
9115                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9116                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9117
9118                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9119                                 tmp16 &= ~((1 << 4) | (7 << 9));
9120                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9121                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9122
9123                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9124                                 tmp &= ~LC_SET_QUIESCE;
9125                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9126                         }
9127                 }
9128         }
9129
9130         /* set the link speed */
9131         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9132         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9133         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9134
9135         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9136         tmp16 &= ~0xf;
9137         if (mask & DRM_PCIE_SPEED_80)
9138                 tmp16 |= 3; /* gen3 */
9139         else if (mask & DRM_PCIE_SPEED_50)
9140                 tmp16 |= 2; /* gen2 */
9141         else
9142                 tmp16 |= 1; /* gen1 */
9143         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9144
9145         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9146         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9147         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9148
9149         for (i = 0; i < rdev->usec_timeout; i++) {
9150                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9151                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9152                         break;
9153                 udelay(1);
9154         }
9155 }
9156
9157 static void cik_program_aspm(struct radeon_device *rdev)
9158 {
9159         u32 data, orig;
9160         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9161         bool disable_clkreq = false;
9162
9163         if (radeon_aspm == 0)
9164                 return;
9165
9166         /* XXX double check IGPs */
9167         if (rdev->flags & RADEON_IS_IGP)
9168                 return;
9169
9170         if (!(rdev->flags & RADEON_IS_PCIE))
9171                 return;
9172
9173         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9174         data &= ~LC_XMIT_N_FTS_MASK;
9175         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9176         if (orig != data)
9177                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9178
9179         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9180         data |= LC_GO_TO_RECOVERY;
9181         if (orig != data)
9182                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9183
9184         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9185         data |= P_IGNORE_EDB_ERR;
9186         if (orig != data)
9187                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9188
9189         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9190         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9191         data |= LC_PMI_TO_L1_DIS;
9192         if (!disable_l0s)
9193                 data |= LC_L0S_INACTIVITY(7);
9194
9195         if (!disable_l1) {
9196                 data |= LC_L1_INACTIVITY(7);
9197                 data &= ~LC_PMI_TO_L1_DIS;
9198                 if (orig != data)
9199                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9200
9201                 if (!disable_plloff_in_l1) {
9202                         bool clk_req_support;
9203
9204                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9205                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9206                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9207                         if (orig != data)
9208                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9209
9210                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9211                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9212                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9213                         if (orig != data)
9214                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9215
9216                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9217                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9218                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9219                         if (orig != data)
9220                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9221
9222                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9223                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9224                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9225                         if (orig != data)
9226                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9227
9228                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9229                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9230                         data |= LC_DYN_LANES_PWR_STATE(3);
9231                         if (orig != data)
9232                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9233
9234                         if (!disable_clkreq) {
9235                                 struct pci_dev *root = rdev->pdev->bus->self;
9236                                 u32 lnkcap;
9237
9238                                 clk_req_support = false;
9239                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9240                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9241                                         clk_req_support = true;
9242                         } else {
9243                                 clk_req_support = false;
9244                         }
9245
9246                         if (clk_req_support) {
9247                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9248                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9249                                 if (orig != data)
9250                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9251
9252                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9253                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9254                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9255                                 if (orig != data)
9256                                         WREG32_SMC(THM_CLK_CNTL, data);
9257
9258                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9259                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9260                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9261                                 if (orig != data)
9262                                         WREG32_SMC(MISC_CLK_CTRL, data);
9263
9264                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9265                                 data &= ~BCLK_AS_XCLK;
9266                                 if (orig != data)
9267                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9268
9269                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9270                                 data &= ~FORCE_BIF_REFCLK_EN;
9271                                 if (orig != data)
9272                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9273
9274                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9275                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9276                                 data |= MPLL_CLKOUT_SEL(4);
9277                                 if (orig != data)
9278                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9279                         }
9280                 }
9281         } else {
9282                 if (orig != data)
9283                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9284         }
9285
9286         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9287         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9288         if (orig != data)
9289                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9290
9291         if (!disable_l0s) {
9292                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9293                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9294                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9295                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9296                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9297                                 data &= ~LC_L0S_INACTIVITY_MASK;
9298                                 if (orig != data)
9299                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9300                         }
9301                 }
9302         }
9303 }