]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge branch 'for-arm-soc/cci-pmu' into for-next/perf
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
81                                           bool enable);
82
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
85 {
86         u32 temp;
87         int actual_temp = 0;
88
89         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
90                 CTF_TEMP_SHIFT;
91
92         if (temp & 0x200)
93                 actual_temp = 255;
94         else
95                 actual_temp = temp & 0x1ff;
96
97         actual_temp = actual_temp * 1000;
98
99         return actual_temp;
100 }
101
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
104 {
105         u32 temp;
106         int actual_temp = 0;
107
108         temp = RREG32_SMC(0xC0300E0C);
109
110         if (temp)
111                 actual_temp = (temp / 8) - 49;
112         else
113                 actual_temp = 0;
114
115         actual_temp = actual_temp * 1000;
116
117         return actual_temp;
118 }
119
120 /*
121  * Indirect registers accessor
122  */
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
124 {
125         unsigned long flags;
126         u32 r;
127
128         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
129         WREG32(PCIE_INDEX, reg);
130         (void)RREG32(PCIE_INDEX);
131         r = RREG32(PCIE_DATA);
132         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
133         return r;
134 }
135
136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
137 {
138         unsigned long flags;
139
140         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
141         WREG32(PCIE_INDEX, reg);
142         (void)RREG32(PCIE_INDEX);
143         WREG32(PCIE_DATA, v);
144         (void)RREG32(PCIE_DATA);
145         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
146 }
147
148 static const u32 spectre_rlc_save_restore_register_list[] =
149 {
150         (0x0e00 << 16) | (0xc12c >> 2),
151         0x00000000,
152         (0x0e00 << 16) | (0xc140 >> 2),
153         0x00000000,
154         (0x0e00 << 16) | (0xc150 >> 2),
155         0x00000000,
156         (0x0e00 << 16) | (0xc15c >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc168 >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc170 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc178 >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc204 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc2b4 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc2b8 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc2bc >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2c0 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0x8228 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0x829c >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0x869c >> 2),
179         0x00000000,
180         (0x0600 << 16) | (0x98f4 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0x98f8 >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x9900 >> 2),
185         0x00000000,
186         (0x0e00 << 16) | (0xc260 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0x90e8 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x3c000 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0x3c00c >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x8c1c >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x9700 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0xcd20 >> 2),
199         0x00000000,
200         (0x4e00 << 16) | (0xcd20 >> 2),
201         0x00000000,
202         (0x5e00 << 16) | (0xcd20 >> 2),
203         0x00000000,
204         (0x6e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x7e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x8e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x9e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0xae00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0xbe00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0x89bc >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x8900 >> 2),
219         0x00000000,
220         0x3,
221         (0x0e00 << 16) | (0xc130 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0xc134 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc1fc >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc208 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc264 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc268 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc26c >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc270 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc274 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc278 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc27c >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc280 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc284 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc288 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc28c >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc290 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc294 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc298 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc29c >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc2a0 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc2a4 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc2a8 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2ac  >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2b0 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0x301d0 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0x30238 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0x30250 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x30254 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30258 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x3025c >> 2),
280         0x00000000,
281         (0x4e00 << 16) | (0xc900 >> 2),
282         0x00000000,
283         (0x5e00 << 16) | (0xc900 >> 2),
284         0x00000000,
285         (0x6e00 << 16) | (0xc900 >> 2),
286         0x00000000,
287         (0x7e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x8e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x9e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0xae00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0xbe00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0x4e00 << 16) | (0xc904 >> 2),
298         0x00000000,
299         (0x5e00 << 16) | (0xc904 >> 2),
300         0x00000000,
301         (0x6e00 << 16) | (0xc904 >> 2),
302         0x00000000,
303         (0x7e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x8e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x9e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0xae00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0xbe00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0x4e00 << 16) | (0xc908 >> 2),
314         0x00000000,
315         (0x5e00 << 16) | (0xc908 >> 2),
316         0x00000000,
317         (0x6e00 << 16) | (0xc908 >> 2),
318         0x00000000,
319         (0x7e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x8e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x9e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0xae00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0xbe00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0x4e00 << 16) | (0xc90c >> 2),
330         0x00000000,
331         (0x5e00 << 16) | (0xc90c >> 2),
332         0x00000000,
333         (0x6e00 << 16) | (0xc90c >> 2),
334         0x00000000,
335         (0x7e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x8e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x9e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0xae00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0xbe00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0x4e00 << 16) | (0xc910 >> 2),
346         0x00000000,
347         (0x5e00 << 16) | (0xc910 >> 2),
348         0x00000000,
349         (0x6e00 << 16) | (0xc910 >> 2),
350         0x00000000,
351         (0x7e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x8e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x9e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0xae00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0xbe00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc99c >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0x9834 >> 2),
364         0x00000000,
365         (0x0000 << 16) | (0x30f00 >> 2),
366         0x00000000,
367         (0x0001 << 16) | (0x30f00 >> 2),
368         0x00000000,
369         (0x0000 << 16) | (0x30f04 >> 2),
370         0x00000000,
371         (0x0001 << 16) | (0x30f04 >> 2),
372         0x00000000,
373         (0x0000 << 16) | (0x30f08 >> 2),
374         0x00000000,
375         (0x0001 << 16) | (0x30f08 >> 2),
376         0x00000000,
377         (0x0000 << 16) | (0x30f0c >> 2),
378         0x00000000,
379         (0x0001 << 16) | (0x30f0c >> 2),
380         0x00000000,
381         (0x0600 << 16) | (0x9b7c >> 2),
382         0x00000000,
383         (0x0e00 << 16) | (0x8a14 >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0x8a18 >> 2),
386         0x00000000,
387         (0x0600 << 16) | (0x30a00 >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0x8bf0 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8bcc >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x8b24 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x30a04 >> 2),
396         0x00000000,
397         (0x0600 << 16) | (0x30a10 >> 2),
398         0x00000000,
399         (0x0600 << 16) | (0x30a14 >> 2),
400         0x00000000,
401         (0x0600 << 16) | (0x30a18 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a2c >> 2),
404         0x00000000,
405         (0x0e00 << 16) | (0xc700 >> 2),
406         0x00000000,
407         (0x0e00 << 16) | (0xc704 >> 2),
408         0x00000000,
409         (0x0e00 << 16) | (0xc708 >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc768 >> 2),
412         0x00000000,
413         (0x0400 << 16) | (0xc770 >> 2),
414         0x00000000,
415         (0x0400 << 16) | (0xc774 >> 2),
416         0x00000000,
417         (0x0400 << 16) | (0xc778 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc77c >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc780 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc784 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc788 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc78c >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc798 >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc79c >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc7a0 >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc7a4 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc7a8 >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7ac >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7b0 >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7b4 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x9100 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0x3c010 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x92a8 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x92ac >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x92b4 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92b8 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92bc >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92c0 >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92c4 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92c8 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92cc >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92d0 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x8c00 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x8c04 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x8c20 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c38 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c3c >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0xae00 >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0x9604 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0xac08 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xac0c >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xac10 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac14 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac58 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac68 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac6c >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac70 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac74 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac78 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac7c >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac80 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac84 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac88 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac8c >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x970c >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x9714 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x9718 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x971c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x31068 >> 2),
522         0x00000000,
523         (0x4e00 << 16) | (0x31068 >> 2),
524         0x00000000,
525         (0x5e00 << 16) | (0x31068 >> 2),
526         0x00000000,
527         (0x6e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x7e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x8e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x9e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0xae00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0xbe00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xcd10 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xcd14 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0x88b0 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x88b4 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x88b8 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88bc >> 2),
550         0x00000000,
551         (0x0400 << 16) | (0x89c0 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x88c4 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88c8 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x88d0 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88d4 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88d8 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x8980 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x30938 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x3093c >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x30940 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x89a0 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x30900 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30904 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x89b4 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x3c210 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x3c214 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x3c218 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x8904 >> 2),
586         0x00000000,
587         0x5,
588         (0x0e00 << 16) | (0x8c28 >> 2),
589         (0x0e00 << 16) | (0x8c2c >> 2),
590         (0x0e00 << 16) | (0x8c30 >> 2),
591         (0x0e00 << 16) | (0x8c34 >> 2),
592         (0x0e00 << 16) | (0x9600 >> 2),
593 };
594
595 static const u32 kalindi_rlc_save_restore_register_list[] =
596 {
597         (0x0e00 << 16) | (0xc12c >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xc140 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xc150 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xc15c >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc168 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc170 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc204 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc2b4 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc2b8 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc2bc >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2c0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x8228 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x829c >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x869c >> 2),
624         0x00000000,
625         (0x0600 << 16) | (0x98f4 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x98f8 >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x9900 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0xc260 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x90e8 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x3c000 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x3c00c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x8c1c >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x9700 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xcd20 >> 2),
644         0x00000000,
645         (0x4e00 << 16) | (0xcd20 >> 2),
646         0x00000000,
647         (0x5e00 << 16) | (0xcd20 >> 2),
648         0x00000000,
649         (0x6e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x7e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0x89bc >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x8900 >> 2),
656         0x00000000,
657         0x3,
658         (0x0e00 << 16) | (0xc130 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc134 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc1fc >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc208 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc264 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc268 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc26c >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc270 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc274 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc28c >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc290 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc294 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc298 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc2a0 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc2a4 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc2a8 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2ac >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x301d0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30238 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30250 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30254 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30258 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3025c >> 2),
703         0x00000000,
704         (0x4e00 << 16) | (0xc900 >> 2),
705         0x00000000,
706         (0x5e00 << 16) | (0xc900 >> 2),
707         0x00000000,
708         (0x6e00 << 16) | (0xc900 >> 2),
709         0x00000000,
710         (0x7e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x4e00 << 16) | (0xc904 >> 2),
713         0x00000000,
714         (0x5e00 << 16) | (0xc904 >> 2),
715         0x00000000,
716         (0x6e00 << 16) | (0xc904 >> 2),
717         0x00000000,
718         (0x7e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x4e00 << 16) | (0xc908 >> 2),
721         0x00000000,
722         (0x5e00 << 16) | (0xc908 >> 2),
723         0x00000000,
724         (0x6e00 << 16) | (0xc908 >> 2),
725         0x00000000,
726         (0x7e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x4e00 << 16) | (0xc90c >> 2),
729         0x00000000,
730         (0x5e00 << 16) | (0xc90c >> 2),
731         0x00000000,
732         (0x6e00 << 16) | (0xc90c >> 2),
733         0x00000000,
734         (0x7e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x4e00 << 16) | (0xc910 >> 2),
737         0x00000000,
738         (0x5e00 << 16) | (0xc910 >> 2),
739         0x00000000,
740         (0x6e00 << 16) | (0xc910 >> 2),
741         0x00000000,
742         (0x7e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc99c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x9834 >> 2),
747         0x00000000,
748         (0x0000 << 16) | (0x30f00 >> 2),
749         0x00000000,
750         (0x0000 << 16) | (0x30f04 >> 2),
751         0x00000000,
752         (0x0000 << 16) | (0x30f08 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f0c >> 2),
755         0x00000000,
756         (0x0600 << 16) | (0x9b7c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8a14 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8a18 >> 2),
761         0x00000000,
762         (0x0600 << 16) | (0x30a00 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x8bf0 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8bcc >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0x8b24 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x30a04 >> 2),
771         0x00000000,
772         (0x0600 << 16) | (0x30a10 >> 2),
773         0x00000000,
774         (0x0600 << 16) | (0x30a14 >> 2),
775         0x00000000,
776         (0x0600 << 16) | (0x30a18 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a2c >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc700 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc704 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc708 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc768 >> 2),
787         0x00000000,
788         (0x0400 << 16) | (0xc770 >> 2),
789         0x00000000,
790         (0x0400 << 16) | (0xc774 >> 2),
791         0x00000000,
792         (0x0400 << 16) | (0xc798 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc79c >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x9100 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x3c010 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x8c00 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x8c04 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8c20 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c38 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c3c >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0xae00 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x9604 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0xac08 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xac0c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xac10 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac14 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac58 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac68 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac6c >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac70 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac74 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac78 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac7c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac80 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac84 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac88 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac8c >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0x970c >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x9714 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x9718 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x971c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x31068 >> 2),
853         0x00000000,
854         (0x4e00 << 16) | (0x31068 >> 2),
855         0x00000000,
856         (0x5e00 << 16) | (0x31068 >> 2),
857         0x00000000,
858         (0x6e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x7e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0xcd10 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xcd14 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x88b0 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x88b4 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x88b8 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88bc >> 2),
873         0x00000000,
874         (0x0400 << 16) | (0x89c0 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x88c4 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88c8 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x88d0 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88d4 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88d8 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x8980 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x30938 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x3093c >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x30940 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x89a0 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x30900 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30904 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x89b4 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x3e1fc >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x3c210 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x3c214 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3c218 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x8904 >> 2),
911         0x00000000,
912         0x5,
913         (0x0e00 << 16) | (0x8c28 >> 2),
914         (0x0e00 << 16) | (0x8c2c >> 2),
915         (0x0e00 << 16) | (0x8c30 >> 2),
916         (0x0e00 << 16) | (0x8c34 >> 2),
917         (0x0e00 << 16) | (0x9600 >> 2),
918 };
919
920 static const u32 bonaire_golden_spm_registers[] =
921 {
922         0x30800, 0xe0ffffff, 0xe0000000
923 };
924
925 static const u32 bonaire_golden_common_registers[] =
926 {
927         0xc770, 0xffffffff, 0x00000800,
928         0xc774, 0xffffffff, 0x00000800,
929         0xc798, 0xffffffff, 0x00007fbf,
930         0xc79c, 0xffffffff, 0x00007faf
931 };
932
933 static const u32 bonaire_golden_registers[] =
934 {
935         0x3354, 0x00000333, 0x00000333,
936         0x3350, 0x000c0fc0, 0x00040200,
937         0x9a10, 0x00010000, 0x00058208,
938         0x3c000, 0xffff1fff, 0x00140000,
939         0x3c200, 0xfdfc0fff, 0x00000100,
940         0x3c234, 0x40000000, 0x40000200,
941         0x9830, 0xffffffff, 0x00000000,
942         0x9834, 0xf00fffff, 0x00000400,
943         0x9838, 0x0002021c, 0x00020200,
944         0xc78, 0x00000080, 0x00000000,
945         0x5bb0, 0x000000f0, 0x00000070,
946         0x5bc0, 0xf0311fff, 0x80300000,
947         0x98f8, 0x73773777, 0x12010001,
948         0x350c, 0x00810000, 0x408af000,
949         0x7030, 0x31000111, 0x00000011,
950         0x2f48, 0x73773777, 0x12010001,
951         0x220c, 0x00007fb6, 0x0021a1b1,
952         0x2210, 0x00007fb6, 0x002021b1,
953         0x2180, 0x00007fb6, 0x00002191,
954         0x2218, 0x00007fb6, 0x002121b1,
955         0x221c, 0x00007fb6, 0x002021b1,
956         0x21dc, 0x00007fb6, 0x00002191,
957         0x21e0, 0x00007fb6, 0x00002191,
958         0x3628, 0x0000003f, 0x0000000a,
959         0x362c, 0x0000003f, 0x0000000a,
960         0x2ae4, 0x00073ffe, 0x000022a2,
961         0x240c, 0x000007ff, 0x00000000,
962         0x8a14, 0xf000003f, 0x00000007,
963         0x8bf0, 0x00002001, 0x00000001,
964         0x8b24, 0xffffffff, 0x00ffffff,
965         0x30a04, 0x0000ff0f, 0x00000000,
966         0x28a4c, 0x07ffffff, 0x06000000,
967         0x4d8, 0x00000fff, 0x00000100,
968         0x3e78, 0x00000001, 0x00000002,
969         0x9100, 0x03000000, 0x0362c688,
970         0x8c00, 0x000000ff, 0x00000001,
971         0xe40, 0x00001fff, 0x00001fff,
972         0x9060, 0x0000007f, 0x00000020,
973         0x9508, 0x00010000, 0x00010000,
974         0xac14, 0x000003ff, 0x000000f3,
975         0xac0c, 0xffffffff, 0x00001032
976 };
977
978 static const u32 bonaire_mgcg_cgcg_init[] =
979 {
980         0xc420, 0xffffffff, 0xfffffffc,
981         0x30800, 0xffffffff, 0xe0000000,
982         0x3c2a0, 0xffffffff, 0x00000100,
983         0x3c208, 0xffffffff, 0x00000100,
984         0x3c2c0, 0xffffffff, 0xc0000100,
985         0x3c2c8, 0xffffffff, 0xc0000100,
986         0x3c2c4, 0xffffffff, 0xc0000100,
987         0x55e4, 0xffffffff, 0x00600100,
988         0x3c280, 0xffffffff, 0x00000100,
989         0x3c214, 0xffffffff, 0x06000100,
990         0x3c220, 0xffffffff, 0x00000100,
991         0x3c218, 0xffffffff, 0x06000100,
992         0x3c204, 0xffffffff, 0x00000100,
993         0x3c2e0, 0xffffffff, 0x00000100,
994         0x3c224, 0xffffffff, 0x00000100,
995         0x3c200, 0xffffffff, 0x00000100,
996         0x3c230, 0xffffffff, 0x00000100,
997         0x3c234, 0xffffffff, 0x00000100,
998         0x3c250, 0xffffffff, 0x00000100,
999         0x3c254, 0xffffffff, 0x00000100,
1000         0x3c258, 0xffffffff, 0x00000100,
1001         0x3c25c, 0xffffffff, 0x00000100,
1002         0x3c260, 0xffffffff, 0x00000100,
1003         0x3c27c, 0xffffffff, 0x00000100,
1004         0x3c278, 0xffffffff, 0x00000100,
1005         0x3c210, 0xffffffff, 0x06000100,
1006         0x3c290, 0xffffffff, 0x00000100,
1007         0x3c274, 0xffffffff, 0x00000100,
1008         0x3c2b4, 0xffffffff, 0x00000100,
1009         0x3c2b0, 0xffffffff, 0x00000100,
1010         0x3c270, 0xffffffff, 0x00000100,
1011         0x30800, 0xffffffff, 0xe0000000,
1012         0x3c020, 0xffffffff, 0x00010000,
1013         0x3c024, 0xffffffff, 0x00030002,
1014         0x3c028, 0xffffffff, 0x00040007,
1015         0x3c02c, 0xffffffff, 0x00060005,
1016         0x3c030, 0xffffffff, 0x00090008,
1017         0x3c034, 0xffffffff, 0x00010000,
1018         0x3c038, 0xffffffff, 0x00030002,
1019         0x3c03c, 0xffffffff, 0x00040007,
1020         0x3c040, 0xffffffff, 0x00060005,
1021         0x3c044, 0xffffffff, 0x00090008,
1022         0x3c048, 0xffffffff, 0x00010000,
1023         0x3c04c, 0xffffffff, 0x00030002,
1024         0x3c050, 0xffffffff, 0x00040007,
1025         0x3c054, 0xffffffff, 0x00060005,
1026         0x3c058, 0xffffffff, 0x00090008,
1027         0x3c05c, 0xffffffff, 0x00010000,
1028         0x3c060, 0xffffffff, 0x00030002,
1029         0x3c064, 0xffffffff, 0x00040007,
1030         0x3c068, 0xffffffff, 0x00060005,
1031         0x3c06c, 0xffffffff, 0x00090008,
1032         0x3c070, 0xffffffff, 0x00010000,
1033         0x3c074, 0xffffffff, 0x00030002,
1034         0x3c078, 0xffffffff, 0x00040007,
1035         0x3c07c, 0xffffffff, 0x00060005,
1036         0x3c080, 0xffffffff, 0x00090008,
1037         0x3c084, 0xffffffff, 0x00010000,
1038         0x3c088, 0xffffffff, 0x00030002,
1039         0x3c08c, 0xffffffff, 0x00040007,
1040         0x3c090, 0xffffffff, 0x00060005,
1041         0x3c094, 0xffffffff, 0x00090008,
1042         0x3c098, 0xffffffff, 0x00010000,
1043         0x3c09c, 0xffffffff, 0x00030002,
1044         0x3c0a0, 0xffffffff, 0x00040007,
1045         0x3c0a4, 0xffffffff, 0x00060005,
1046         0x3c0a8, 0xffffffff, 0x00090008,
1047         0x3c000, 0xffffffff, 0x96e00200,
1048         0x8708, 0xffffffff, 0x00900100,
1049         0xc424, 0xffffffff, 0x0020003f,
1050         0x38, 0xffffffff, 0x0140001c,
1051         0x3c, 0x000f0000, 0x000f0000,
1052         0x220, 0xffffffff, 0xC060000C,
1053         0x224, 0xc0000fff, 0x00000100,
1054         0xf90, 0xffffffff, 0x00000100,
1055         0xf98, 0x00000101, 0x00000000,
1056         0x20a8, 0xffffffff, 0x00000104,
1057         0x55e4, 0xff000fff, 0x00000100,
1058         0x30cc, 0xc0000fff, 0x00000104,
1059         0xc1e4, 0x00000001, 0x00000001,
1060         0xd00c, 0xff000ff0, 0x00000100,
1061         0xd80c, 0xff000ff0, 0x00000100
1062 };
1063
1064 static const u32 spectre_golden_spm_registers[] =
1065 {
1066         0x30800, 0xe0ffffff, 0xe0000000
1067 };
1068
1069 static const u32 spectre_golden_common_registers[] =
1070 {
1071         0xc770, 0xffffffff, 0x00000800,
1072         0xc774, 0xffffffff, 0x00000800,
1073         0xc798, 0xffffffff, 0x00007fbf,
1074         0xc79c, 0xffffffff, 0x00007faf
1075 };
1076
1077 static const u32 spectre_golden_registers[] =
1078 {
1079         0x3c000, 0xffff1fff, 0x96940200,
1080         0x3c00c, 0xffff0001, 0xff000000,
1081         0x3c200, 0xfffc0fff, 0x00000100,
1082         0x6ed8, 0x00010101, 0x00010000,
1083         0x9834, 0xf00fffff, 0x00000400,
1084         0x9838, 0xfffffffc, 0x00020200,
1085         0x5bb0, 0x000000f0, 0x00000070,
1086         0x5bc0, 0xf0311fff, 0x80300000,
1087         0x98f8, 0x73773777, 0x12010001,
1088         0x9b7c, 0x00ff0000, 0x00fc0000,
1089         0x2f48, 0x73773777, 0x12010001,
1090         0x8a14, 0xf000003f, 0x00000007,
1091         0x8b24, 0xffffffff, 0x00ffffff,
1092         0x28350, 0x3f3f3fff, 0x00000082,
1093         0x28355, 0x0000003f, 0x00000000,
1094         0x3e78, 0x00000001, 0x00000002,
1095         0x913c, 0xffff03df, 0x00000004,
1096         0xc768, 0x00000008, 0x00000008,
1097         0x8c00, 0x000008ff, 0x00000800,
1098         0x9508, 0x00010000, 0x00010000,
1099         0xac0c, 0xffffffff, 0x54763210,
1100         0x214f8, 0x01ff01ff, 0x00000002,
1101         0x21498, 0x007ff800, 0x00200000,
1102         0x2015c, 0xffffffff, 0x00000f40,
1103         0x30934, 0xffffffff, 0x00000001
1104 };
1105
1106 static const u32 spectre_mgcg_cgcg_init[] =
1107 {
1108         0xc420, 0xffffffff, 0xfffffffc,
1109         0x30800, 0xffffffff, 0xe0000000,
1110         0x3c2a0, 0xffffffff, 0x00000100,
1111         0x3c208, 0xffffffff, 0x00000100,
1112         0x3c2c0, 0xffffffff, 0x00000100,
1113         0x3c2c8, 0xffffffff, 0x00000100,
1114         0x3c2c4, 0xffffffff, 0x00000100,
1115         0x55e4, 0xffffffff, 0x00600100,
1116         0x3c280, 0xffffffff, 0x00000100,
1117         0x3c214, 0xffffffff, 0x06000100,
1118         0x3c220, 0xffffffff, 0x00000100,
1119         0x3c218, 0xffffffff, 0x06000100,
1120         0x3c204, 0xffffffff, 0x00000100,
1121         0x3c2e0, 0xffffffff, 0x00000100,
1122         0x3c224, 0xffffffff, 0x00000100,
1123         0x3c200, 0xffffffff, 0x00000100,
1124         0x3c230, 0xffffffff, 0x00000100,
1125         0x3c234, 0xffffffff, 0x00000100,
1126         0x3c250, 0xffffffff, 0x00000100,
1127         0x3c254, 0xffffffff, 0x00000100,
1128         0x3c258, 0xffffffff, 0x00000100,
1129         0x3c25c, 0xffffffff, 0x00000100,
1130         0x3c260, 0xffffffff, 0x00000100,
1131         0x3c27c, 0xffffffff, 0x00000100,
1132         0x3c278, 0xffffffff, 0x00000100,
1133         0x3c210, 0xffffffff, 0x06000100,
1134         0x3c290, 0xffffffff, 0x00000100,
1135         0x3c274, 0xffffffff, 0x00000100,
1136         0x3c2b4, 0xffffffff, 0x00000100,
1137         0x3c2b0, 0xffffffff, 0x00000100,
1138         0x3c270, 0xffffffff, 0x00000100,
1139         0x30800, 0xffffffff, 0xe0000000,
1140         0x3c020, 0xffffffff, 0x00010000,
1141         0x3c024, 0xffffffff, 0x00030002,
1142         0x3c028, 0xffffffff, 0x00040007,
1143         0x3c02c, 0xffffffff, 0x00060005,
1144         0x3c030, 0xffffffff, 0x00090008,
1145         0x3c034, 0xffffffff, 0x00010000,
1146         0x3c038, 0xffffffff, 0x00030002,
1147         0x3c03c, 0xffffffff, 0x00040007,
1148         0x3c040, 0xffffffff, 0x00060005,
1149         0x3c044, 0xffffffff, 0x00090008,
1150         0x3c048, 0xffffffff, 0x00010000,
1151         0x3c04c, 0xffffffff, 0x00030002,
1152         0x3c050, 0xffffffff, 0x00040007,
1153         0x3c054, 0xffffffff, 0x00060005,
1154         0x3c058, 0xffffffff, 0x00090008,
1155         0x3c05c, 0xffffffff, 0x00010000,
1156         0x3c060, 0xffffffff, 0x00030002,
1157         0x3c064, 0xffffffff, 0x00040007,
1158         0x3c068, 0xffffffff, 0x00060005,
1159         0x3c06c, 0xffffffff, 0x00090008,
1160         0x3c070, 0xffffffff, 0x00010000,
1161         0x3c074, 0xffffffff, 0x00030002,
1162         0x3c078, 0xffffffff, 0x00040007,
1163         0x3c07c, 0xffffffff, 0x00060005,
1164         0x3c080, 0xffffffff, 0x00090008,
1165         0x3c084, 0xffffffff, 0x00010000,
1166         0x3c088, 0xffffffff, 0x00030002,
1167         0x3c08c, 0xffffffff, 0x00040007,
1168         0x3c090, 0xffffffff, 0x00060005,
1169         0x3c094, 0xffffffff, 0x00090008,
1170         0x3c098, 0xffffffff, 0x00010000,
1171         0x3c09c, 0xffffffff, 0x00030002,
1172         0x3c0a0, 0xffffffff, 0x00040007,
1173         0x3c0a4, 0xffffffff, 0x00060005,
1174         0x3c0a8, 0xffffffff, 0x00090008,
1175         0x3c0ac, 0xffffffff, 0x00010000,
1176         0x3c0b0, 0xffffffff, 0x00030002,
1177         0x3c0b4, 0xffffffff, 0x00040007,
1178         0x3c0b8, 0xffffffff, 0x00060005,
1179         0x3c0bc, 0xffffffff, 0x00090008,
1180         0x3c000, 0xffffffff, 0x96e00200,
1181         0x8708, 0xffffffff, 0x00900100,
1182         0xc424, 0xffffffff, 0x0020003f,
1183         0x38, 0xffffffff, 0x0140001c,
1184         0x3c, 0x000f0000, 0x000f0000,
1185         0x220, 0xffffffff, 0xC060000C,
1186         0x224, 0xc0000fff, 0x00000100,
1187         0xf90, 0xffffffff, 0x00000100,
1188         0xf98, 0x00000101, 0x00000000,
1189         0x20a8, 0xffffffff, 0x00000104,
1190         0x55e4, 0xff000fff, 0x00000100,
1191         0x30cc, 0xc0000fff, 0x00000104,
1192         0xc1e4, 0x00000001, 0x00000001,
1193         0xd00c, 0xff000ff0, 0x00000100,
1194         0xd80c, 0xff000ff0, 0x00000100
1195 };
1196
1197 static const u32 kalindi_golden_spm_registers[] =
1198 {
1199         0x30800, 0xe0ffffff, 0xe0000000
1200 };
1201
1202 static const u32 kalindi_golden_common_registers[] =
1203 {
1204         0xc770, 0xffffffff, 0x00000800,
1205         0xc774, 0xffffffff, 0x00000800,
1206         0xc798, 0xffffffff, 0x00007fbf,
1207         0xc79c, 0xffffffff, 0x00007faf
1208 };
1209
1210 static const u32 kalindi_golden_registers[] =
1211 {
1212         0x3c000, 0xffffdfff, 0x6e944040,
1213         0x55e4, 0xff607fff, 0xfc000100,
1214         0x3c220, 0xff000fff, 0x00000100,
1215         0x3c224, 0xff000fff, 0x00000100,
1216         0x3c200, 0xfffc0fff, 0x00000100,
1217         0x6ed8, 0x00010101, 0x00010000,
1218         0x9830, 0xffffffff, 0x00000000,
1219         0x9834, 0xf00fffff, 0x00000400,
1220         0x5bb0, 0x000000f0, 0x00000070,
1221         0x5bc0, 0xf0311fff, 0x80300000,
1222         0x98f8, 0x73773777, 0x12010001,
1223         0x98fc, 0xffffffff, 0x00000010,
1224         0x9b7c, 0x00ff0000, 0x00fc0000,
1225         0x8030, 0x00001f0f, 0x0000100a,
1226         0x2f48, 0x73773777, 0x12010001,
1227         0x2408, 0x000fffff, 0x000c007f,
1228         0x8a14, 0xf000003f, 0x00000007,
1229         0x8b24, 0x3fff3fff, 0x00ffcfff,
1230         0x30a04, 0x0000ff0f, 0x00000000,
1231         0x28a4c, 0x07ffffff, 0x06000000,
1232         0x4d8, 0x00000fff, 0x00000100,
1233         0x3e78, 0x00000001, 0x00000002,
1234         0xc768, 0x00000008, 0x00000008,
1235         0x8c00, 0x000000ff, 0x00000003,
1236         0x214f8, 0x01ff01ff, 0x00000002,
1237         0x21498, 0x007ff800, 0x00200000,
1238         0x2015c, 0xffffffff, 0x00000f40,
1239         0x88c4, 0x001f3ae3, 0x00000082,
1240         0x88d4, 0x0000001f, 0x00000010,
1241         0x30934, 0xffffffff, 0x00000000
1242 };
1243
1244 static const u32 kalindi_mgcg_cgcg_init[] =
1245 {
1246         0xc420, 0xffffffff, 0xfffffffc,
1247         0x30800, 0xffffffff, 0xe0000000,
1248         0x3c2a0, 0xffffffff, 0x00000100,
1249         0x3c208, 0xffffffff, 0x00000100,
1250         0x3c2c0, 0xffffffff, 0x00000100,
1251         0x3c2c8, 0xffffffff, 0x00000100,
1252         0x3c2c4, 0xffffffff, 0x00000100,
1253         0x55e4, 0xffffffff, 0x00600100,
1254         0x3c280, 0xffffffff, 0x00000100,
1255         0x3c214, 0xffffffff, 0x06000100,
1256         0x3c220, 0xffffffff, 0x00000100,
1257         0x3c218, 0xffffffff, 0x06000100,
1258         0x3c204, 0xffffffff, 0x00000100,
1259         0x3c2e0, 0xffffffff, 0x00000100,
1260         0x3c224, 0xffffffff, 0x00000100,
1261         0x3c200, 0xffffffff, 0x00000100,
1262         0x3c230, 0xffffffff, 0x00000100,
1263         0x3c234, 0xffffffff, 0x00000100,
1264         0x3c250, 0xffffffff, 0x00000100,
1265         0x3c254, 0xffffffff, 0x00000100,
1266         0x3c258, 0xffffffff, 0x00000100,
1267         0x3c25c, 0xffffffff, 0x00000100,
1268         0x3c260, 0xffffffff, 0x00000100,
1269         0x3c27c, 0xffffffff, 0x00000100,
1270         0x3c278, 0xffffffff, 0x00000100,
1271         0x3c210, 0xffffffff, 0x06000100,
1272         0x3c290, 0xffffffff, 0x00000100,
1273         0x3c274, 0xffffffff, 0x00000100,
1274         0x3c2b4, 0xffffffff, 0x00000100,
1275         0x3c2b0, 0xffffffff, 0x00000100,
1276         0x3c270, 0xffffffff, 0x00000100,
1277         0x30800, 0xffffffff, 0xe0000000,
1278         0x3c020, 0xffffffff, 0x00010000,
1279         0x3c024, 0xffffffff, 0x00030002,
1280         0x3c028, 0xffffffff, 0x00040007,
1281         0x3c02c, 0xffffffff, 0x00060005,
1282         0x3c030, 0xffffffff, 0x00090008,
1283         0x3c034, 0xffffffff, 0x00010000,
1284         0x3c038, 0xffffffff, 0x00030002,
1285         0x3c03c, 0xffffffff, 0x00040007,
1286         0x3c040, 0xffffffff, 0x00060005,
1287         0x3c044, 0xffffffff, 0x00090008,
1288         0x3c000, 0xffffffff, 0x96e00200,
1289         0x8708, 0xffffffff, 0x00900100,
1290         0xc424, 0xffffffff, 0x0020003f,
1291         0x38, 0xffffffff, 0x0140001c,
1292         0x3c, 0x000f0000, 0x000f0000,
1293         0x220, 0xffffffff, 0xC060000C,
1294         0x224, 0xc0000fff, 0x00000100,
1295         0x20a8, 0xffffffff, 0x00000104,
1296         0x55e4, 0xff000fff, 0x00000100,
1297         0x30cc, 0xc0000fff, 0x00000104,
1298         0xc1e4, 0x00000001, 0x00000001,
1299         0xd00c, 0xff000ff0, 0x00000100,
1300         0xd80c, 0xff000ff0, 0x00000100
1301 };
1302
1303 static void cik_init_golden_registers(struct radeon_device *rdev)
1304 {
1305         switch (rdev->family) {
1306         case CHIP_BONAIRE:
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_mgcg_cgcg_init,
1309                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1310                 radeon_program_register_sequence(rdev,
1311                                                  bonaire_golden_registers,
1312                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1313                 radeon_program_register_sequence(rdev,
1314                                                  bonaire_golden_common_registers,
1315                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1316                 radeon_program_register_sequence(rdev,
1317                                                  bonaire_golden_spm_registers,
1318                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1319                 break;
1320         case CHIP_KABINI:
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_mgcg_cgcg_init,
1323                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1324                 radeon_program_register_sequence(rdev,
1325                                                  kalindi_golden_registers,
1326                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1327                 radeon_program_register_sequence(rdev,
1328                                                  kalindi_golden_common_registers,
1329                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1330                 radeon_program_register_sequence(rdev,
1331                                                  kalindi_golden_spm_registers,
1332                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1333                 break;
1334         case CHIP_KAVERI:
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_mgcg_cgcg_init,
1337                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1338                 radeon_program_register_sequence(rdev,
1339                                                  spectre_golden_registers,
1340                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1341                 radeon_program_register_sequence(rdev,
1342                                                  spectre_golden_common_registers,
1343                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1344                 radeon_program_register_sequence(rdev,
1345                                                  spectre_golden_spm_registers,
1346                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1347                 break;
1348         default:
1349                 break;
1350         }
1351 }
1352
1353 /**
1354  * cik_get_xclk - get the xclk
1355  *
1356  * @rdev: radeon_device pointer
1357  *
1358  * Returns the reference clock used by the gfx engine
1359  * (CIK).
1360  */
1361 u32 cik_get_xclk(struct radeon_device *rdev)
1362 {
1363         u32 reference_clock = rdev->clock.spll.reference_freq;
1364
1365         if (rdev->flags & RADEON_IS_IGP) {
1366                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1367                         return reference_clock / 2;
1368         } else {
1369                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1370                         return reference_clock / 4;
1371         }
1372         return reference_clock;
1373 }
1374
1375 /**
1376  * cik_mm_rdoorbell - read a doorbell dword
1377  *
1378  * @rdev: radeon_device pointer
1379  * @offset: byte offset into the aperture
1380  *
1381  * Returns the value in the doorbell aperture at the
1382  * requested offset (CIK).
1383  */
1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1385 {
1386         if (offset < rdev->doorbell.size) {
1387                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1388         } else {
1389                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1390                 return 0;
1391         }
1392 }
1393
1394 /**
1395  * cik_mm_wdoorbell - write a doorbell dword
1396  *
1397  * @rdev: radeon_device pointer
1398  * @offset: byte offset into the aperture
1399  * @v: value to write
1400  *
1401  * Writes @v to the doorbell aperture at the
1402  * requested offset (CIK).
1403  */
1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1405 {
1406         if (offset < rdev->doorbell.size) {
1407                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1408         } else {
1409                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1410         }
1411 }
1412
1413 #define BONAIRE_IO_MC_REGS_SIZE 36
1414
1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1416 {
1417         {0x00000070, 0x04400000},
1418         {0x00000071, 0x80c01803},
1419         {0x00000072, 0x00004004},
1420         {0x00000073, 0x00000100},
1421         {0x00000074, 0x00ff0000},
1422         {0x00000075, 0x34000000},
1423         {0x00000076, 0x08000014},
1424         {0x00000077, 0x00cc08ec},
1425         {0x00000078, 0x00000400},
1426         {0x00000079, 0x00000000},
1427         {0x0000007a, 0x04090000},
1428         {0x0000007c, 0x00000000},
1429         {0x0000007e, 0x4408a8e8},
1430         {0x0000007f, 0x00000304},
1431         {0x00000080, 0x00000000},
1432         {0x00000082, 0x00000001},
1433         {0x00000083, 0x00000002},
1434         {0x00000084, 0xf3e4f400},
1435         {0x00000085, 0x052024e3},
1436         {0x00000087, 0x00000000},
1437         {0x00000088, 0x01000000},
1438         {0x0000008a, 0x1c0a0000},
1439         {0x0000008b, 0xff010000},
1440         {0x0000008d, 0xffffefff},
1441         {0x0000008e, 0xfff3efff},
1442         {0x0000008f, 0xfff3efbf},
1443         {0x00000092, 0xf7ffffff},
1444         {0x00000093, 0xffffff7f},
1445         {0x00000095, 0x00101101},
1446         {0x00000096, 0x00000fff},
1447         {0x00000097, 0x00116fff},
1448         {0x00000098, 0x60010000},
1449         {0x00000099, 0x10010000},
1450         {0x0000009a, 0x00006000},
1451         {0x0000009b, 0x00001000},
1452         {0x0000009f, 0x00b48000}
1453 };
1454
1455 /**
1456  * cik_srbm_select - select specific register instances
1457  *
1458  * @rdev: radeon_device pointer
1459  * @me: selected ME (micro engine)
1460  * @pipe: pipe
1461  * @queue: queue
1462  * @vmid: VMID
1463  *
1464  * Switches the currently active registers instances.  Some
1465  * registers are instanced per VMID, others are instanced per
1466  * me/pipe/queue combination.
1467  */
1468 static void cik_srbm_select(struct radeon_device *rdev,
1469                             u32 me, u32 pipe, u32 queue, u32 vmid)
1470 {
1471         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1472                              MEID(me & 0x3) |
1473                              VMID(vmid & 0xf) |
1474                              QUEUEID(queue & 0x7));
1475         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1476 }
1477
1478 /* ucode loading */
1479 /**
1480  * ci_mc_load_microcode - load MC ucode into the hw
1481  *
1482  * @rdev: radeon_device pointer
1483  *
1484  * Load the GDDR MC ucode into the hw (CIK).
1485  * Returns 0 on success, error on failure.
1486  */
1487 static int ci_mc_load_microcode(struct radeon_device *rdev)
1488 {
1489         const __be32 *fw_data;
1490         u32 running, blackout = 0;
1491         u32 *io_mc_regs;
1492         int i, ucode_size, regs_size;
1493
1494         if (!rdev->mc_fw)
1495                 return -EINVAL;
1496
1497         switch (rdev->family) {
1498         case CHIP_BONAIRE:
1499         default:
1500                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1501                 ucode_size = CIK_MC_UCODE_SIZE;
1502                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1503                 break;
1504         }
1505
1506         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507
1508         if (running == 0) {
1509                 if (running) {
1510                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512                 }
1513
1514                 /* reset the engine and set to writable */
1515                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517
1518                 /* load mc io regs */
1519                 for (i = 0; i < regs_size; i++) {
1520                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522                 }
1523                 /* load the MC ucode */
1524                 fw_data = (const __be32 *)rdev->mc_fw->data;
1525                 for (i = 0; i < ucode_size; i++)
1526                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527
1528                 /* put the engine back into the active state */
1529                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532
1533                 /* wait for training to complete */
1534                 for (i = 0; i < rdev->usec_timeout; i++) {
1535                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536                                 break;
1537                         udelay(1);
1538                 }
1539                 for (i = 0; i < rdev->usec_timeout; i++) {
1540                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541                                 break;
1542                         udelay(1);
1543                 }
1544
1545                 if (running)
1546                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547         }
1548
1549         return 0;
1550 }
1551
1552 /**
1553  * cik_init_microcode - load ucode images from disk
1554  *
1555  * @rdev: radeon_device pointer
1556  *
1557  * Use the firmware interface to load the ucode images into
1558  * the driver (not loaded into hw).
1559  * Returns 0 on success, error on failure.
1560  */
1561 static int cik_init_microcode(struct radeon_device *rdev)
1562 {
1563         const char *chip_name;
1564         size_t pfp_req_size, me_req_size, ce_req_size,
1565                 mec_req_size, rlc_req_size, mc_req_size,
1566                 sdma_req_size, smc_req_size;
1567         char fw_name[30];
1568         int err;
1569
1570         DRM_DEBUG("\n");
1571
1572         switch (rdev->family) {
1573         case CHIP_BONAIRE:
1574                 chip_name = "BONAIRE";
1575                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1576                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1577                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1578                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1579                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1581                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1582                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1583                 break;
1584         case CHIP_KAVERI:
1585                 chip_name = "KAVERI";
1586                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1591                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592                 break;
1593         case CHIP_KABINI:
1594                 chip_name = "KABINI";
1595                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1596                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1597                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1598                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1599                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1600                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1601                 break;
1602         default: BUG();
1603         }
1604
1605         DRM_INFO("Loading %s Microcode\n", chip_name);
1606
1607         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1608         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1609         if (err)
1610                 goto out;
1611         if (rdev->pfp_fw->size != pfp_req_size) {
1612                 printk(KERN_ERR
1613                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1614                        rdev->pfp_fw->size, fw_name);
1615                 err = -EINVAL;
1616                 goto out;
1617         }
1618
1619         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1620         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1621         if (err)
1622                 goto out;
1623         if (rdev->me_fw->size != me_req_size) {
1624                 printk(KERN_ERR
1625                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1626                        rdev->me_fw->size, fw_name);
1627                 err = -EINVAL;
1628         }
1629
1630         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1631         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1632         if (err)
1633                 goto out;
1634         if (rdev->ce_fw->size != ce_req_size) {
1635                 printk(KERN_ERR
1636                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1637                        rdev->ce_fw->size, fw_name);
1638                 err = -EINVAL;
1639         }
1640
1641         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1642         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1643         if (err)
1644                 goto out;
1645         if (rdev->mec_fw->size != mec_req_size) {
1646                 printk(KERN_ERR
1647                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1648                        rdev->mec_fw->size, fw_name);
1649                 err = -EINVAL;
1650         }
1651
1652         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1653         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1654         if (err)
1655                 goto out;
1656         if (rdev->rlc_fw->size != rlc_req_size) {
1657                 printk(KERN_ERR
1658                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1659                        rdev->rlc_fw->size, fw_name);
1660                 err = -EINVAL;
1661         }
1662
1663         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1664         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1665         if (err)
1666                 goto out;
1667         if (rdev->sdma_fw->size != sdma_req_size) {
1668                 printk(KERN_ERR
1669                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1670                        rdev->sdma_fw->size, fw_name);
1671                 err = -EINVAL;
1672         }
1673
1674         /* No SMC, MC ucode on APUs */
1675         if (!(rdev->flags & RADEON_IS_IGP)) {
1676                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1677                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1678                 if (err)
1679                         goto out;
1680                 if (rdev->mc_fw->size != mc_req_size) {
1681                         printk(KERN_ERR
1682                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1683                                rdev->mc_fw->size, fw_name);
1684                         err = -EINVAL;
1685                 }
1686
1687                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1688                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1689                 if (err) {
1690                         printk(KERN_ERR
1691                                "smc: error loading firmware \"%s\"\n",
1692                                fw_name);
1693                         release_firmware(rdev->smc_fw);
1694                         rdev->smc_fw = NULL;
1695                 } else if (rdev->smc_fw->size != smc_req_size) {
1696                         printk(KERN_ERR
1697                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1698                                rdev->smc_fw->size, fw_name);
1699                         err = -EINVAL;
1700                 }
1701         }
1702
1703 out:
1704         if (err) {
1705                 if (err != -EINVAL)
1706                         printk(KERN_ERR
1707                                "cik_cp: Failed to load firmware \"%s\"\n",
1708                                fw_name);
1709                 release_firmware(rdev->pfp_fw);
1710                 rdev->pfp_fw = NULL;
1711                 release_firmware(rdev->me_fw);
1712                 rdev->me_fw = NULL;
1713                 release_firmware(rdev->ce_fw);
1714                 rdev->ce_fw = NULL;
1715                 release_firmware(rdev->rlc_fw);
1716                 rdev->rlc_fw = NULL;
1717                 release_firmware(rdev->mc_fw);
1718                 rdev->mc_fw = NULL;
1719                 release_firmware(rdev->smc_fw);
1720                 rdev->smc_fw = NULL;
1721         }
1722         return err;
1723 }
1724
1725 /*
1726  * Core functions
1727  */
1728 /**
1729  * cik_tiling_mode_table_init - init the hw tiling table
1730  *
1731  * @rdev: radeon_device pointer
1732  *
1733  * Starting with SI, the tiling setup is done globally in a
1734  * set of 32 tiling modes.  Rather than selecting each set of
1735  * parameters per surface as on older asics, we just select
1736  * which index in the tiling table we want to use, and the
1737  * surface uses those parameters (CIK).
1738  */
1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1740 {
1741         const u32 num_tile_mode_states = 32;
1742         const u32 num_secondary_tile_mode_states = 16;
1743         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1744         u32 num_pipe_configs;
1745         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1746                 rdev->config.cik.max_shader_engines;
1747
1748         switch (rdev->config.cik.mem_row_size_in_kb) {
1749         case 1:
1750                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1751                 break;
1752         case 2:
1753         default:
1754                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1755                 break;
1756         case 4:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1758                 break;
1759         }
1760
1761         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1762         if (num_pipe_configs > 8)
1763                 num_pipe_configs = 8; /* ??? */
1764
1765         if (num_pipe_configs == 8) {
1766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767                         switch (reg_offset) {
1768                         case 0:
1769                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1773                                 break;
1774                         case 1:
1775                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1779                                 break;
1780                         case 2:
1781                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1785                                 break;
1786                         case 3:
1787                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1791                                 break;
1792                         case 4:
1793                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1794                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796                                                  TILE_SPLIT(split_equal_to_row_size));
1797                                 break;
1798                         case 5:
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1800                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1801                                 break;
1802                         case 6:
1803                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1804                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1805                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1806                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1807                                 break;
1808                         case 7:
1809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1810                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1811                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812                                                  TILE_SPLIT(split_equal_to_row_size));
1813                                 break;
1814                         case 8:
1815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1816                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1817                                 break;
1818                         case 9:
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1821                                 break;
1822                         case 10:
1823                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1824                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827                                 break;
1828                         case 11:
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833                                 break;
1834                         case 12:
1835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1838                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839                                 break;
1840                         case 13:
1841                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1842                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1843                                 break;
1844                         case 14:
1845                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849                                 break;
1850                         case 16:
1851                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1852                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1854                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855                                 break;
1856                         case 17:
1857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1858                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1859                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1860                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861                                 break;
1862                         case 27:
1863                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1865                                 break;
1866                         case 28:
1867                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871                                 break;
1872                         case 29:
1873                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1875                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1876                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1877                                 break;
1878                         case 30:
1879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1880                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1881                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1882                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883                                 break;
1884                         default:
1885                                 gb_tile_moden = 0;
1886                                 break;
1887                         }
1888                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1889                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1890                 }
1891                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1892                         switch (reg_offset) {
1893                         case 0:
1894                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1896                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1897                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1898                                 break;
1899                         case 1:
1900                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1904                                 break;
1905                         case 2:
1906                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1909                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1910                                 break;
1911                         case 3:
1912                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1916                                 break;
1917                         case 4:
1918                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1922                                 break;
1923                         case 5:
1924                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1928                                 break;
1929                         case 6:
1930                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1934                                 break;
1935                         case 8:
1936                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1938                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1939                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1940                                 break;
1941                         case 9:
1942                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1944                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1945                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1946                                 break;
1947                         case 10:
1948                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1950                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1951                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1952                                 break;
1953                         case 11:
1954                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1957                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1958                                 break;
1959                         case 12:
1960                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1964                                 break;
1965                         case 13:
1966                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1968                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1969                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1970                                 break;
1971                         case 14:
1972                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1976                                 break;
1977                         default:
1978                                 gb_tile_moden = 0;
1979                                 break;
1980                         }
1981                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1982                 }
1983         } else if (num_pipe_configs == 4) {
1984                 if (num_rbs == 4) {
1985                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1986                                 switch (reg_offset) {
1987                                 case 0:
1988                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1992                                         break;
1993                                 case 1:
1994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1998                                         break;
1999                                 case 2:
2000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2004                                         break;
2005                                 case 3:
2006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2010                                         break;
2011                                 case 4:
2012                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015                                                          TILE_SPLIT(split_equal_to_row_size));
2016                                         break;
2017                                 case 5:
2018                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2019                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2020                                         break;
2021                                 case 6:
2022                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2023                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2025                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2026                                         break;
2027                                 case 7:
2028                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2029                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031                                                          TILE_SPLIT(split_equal_to_row_size));
2032                                         break;
2033                                 case 8:
2034                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2035                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2036                                         break;
2037                                 case 9:
2038                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2040                                         break;
2041                                 case 10:
2042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2046                                         break;
2047                                 case 11:
2048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2049                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2051                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052                                         break;
2053                                 case 12:
2054                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2055                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2057                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                         break;
2059                                 case 13:
2060                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2062                                         break;
2063                                 case 14:
2064                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068                                         break;
2069                                 case 16:
2070                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                                         break;
2075                                 case 17:
2076                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2079                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                         break;
2081                                 case 27:
2082                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2084                                         break;
2085                                 case 28:
2086                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                                         break;
2091                                 case 29:
2092                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2095                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                                         break;
2097                                 case 30:
2098                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2101                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                         break;
2103                                 default:
2104                                         gb_tile_moden = 0;
2105                                         break;
2106                                 }
2107                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2108                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2109                         }
2110                 } else if (num_rbs < 4) {
2111                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2112                                 switch (reg_offset) {
2113                                 case 0:
2114                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2118                                         break;
2119                                 case 1:
2120                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2124                                         break;
2125                                 case 2:
2126                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2130                                         break;
2131                                 case 3:
2132                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2136                                         break;
2137                                 case 4:
2138                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141                                                          TILE_SPLIT(split_equal_to_row_size));
2142                                         break;
2143                                 case 5:
2144                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                                         break;
2147                                 case 6:
2148                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2149                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2150                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2151                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2152                                         break;
2153                                 case 7:
2154                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2155                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2156                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157                                                          TILE_SPLIT(split_equal_to_row_size));
2158                                         break;
2159                                 case 8:
2160                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2161                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2162                                         break;
2163                                 case 9:
2164                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2166                                         break;
2167                                 case 10:
2168                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                                         break;
2173                                 case 11:
2174                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178                                         break;
2179                                 case 12:
2180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                                         break;
2185                                 case 13:
2186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2188                                         break;
2189                                 case 14:
2190                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                                         break;
2195                                 case 16:
2196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                                         break;
2201                                 case 17:
2202                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2203                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                                         break;
2207                                 case 27:
2208                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2210                                         break;
2211                                 case 28:
2212                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                                         break;
2217                                 case 29:
2218                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2221                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222                                         break;
2223                                 case 30:
2224                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2225                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2227                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                                         break;
2229                                 default:
2230                                         gb_tile_moden = 0;
2231                                         break;
2232                                 }
2233                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2234                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2235                         }
2236                 }
2237                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2238                         switch (reg_offset) {
2239                         case 0:
2240                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2244                                 break;
2245                         case 1:
2246                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2250                                 break;
2251                         case 2:
2252                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2256                                 break;
2257                         case 3:
2258                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2262                                 break;
2263                         case 4:
2264                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2268                                 break;
2269                         case 5:
2270                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2274                                 break;
2275                         case 6:
2276                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2279                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2280                                 break;
2281                         case 8:
2282                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2284                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2286                                 break;
2287                         case 9:
2288                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2292                                 break;
2293                         case 10:
2294                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                                 break;
2299                         case 11:
2300                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2304                                 break;
2305                         case 12:
2306                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2309                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2310                                 break;
2311                         case 13:
2312                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2314                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2315                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2316                                 break;
2317                         case 14:
2318                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2321                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2322                                 break;
2323                         default:
2324                                 gb_tile_moden = 0;
2325                                 break;
2326                         }
2327                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2328                 }
2329         } else if (num_pipe_configs == 2) {
2330                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2331                         switch (reg_offset) {
2332                         case 0:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2337                                 break;
2338                         case 1:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2343                                 break;
2344                         case 2:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2348                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2349                                 break;
2350                         case 3:
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2354                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2355                                 break;
2356                         case 4:
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2360                                                  TILE_SPLIT(split_equal_to_row_size));
2361                                 break;
2362                         case 5:
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365                                 break;
2366                         case 6:
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2370                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2371                                 break;
2372                         case 7:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2376                                                  TILE_SPLIT(split_equal_to_row_size));
2377                                 break;
2378                         case 8:
2379                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2380                                 break;
2381                         case 9:
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2384                                 break;
2385                         case 10:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2389                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390                                 break;
2391                         case 11:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2395                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                                 break;
2397                         case 12:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2401                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                                 break;
2403                         case 13:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2406                                 break;
2407                         case 14:
2408                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2411                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                                 break;
2413                         case 16:
2414                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2417                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                                 break;
2419                         case 17:
2420                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2423                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                                 break;
2425                         case 27:
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2428                                 break;
2429                         case 28:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 29:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 30:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         default:
2448                                 gb_tile_moden = 0;
2449                                 break;
2450                         }
2451                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2452                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2453                 }
2454                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2455                         switch (reg_offset) {
2456                         case 0:
2457                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2461                                 break;
2462                         case 1:
2463                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2467                                 break;
2468                         case 2:
2469                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2473                                 break;
2474                         case 3:
2475                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2479                                 break;
2480                         case 4:
2481                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2485                                 break;
2486                         case 5:
2487                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2491                                 break;
2492                         case 6:
2493                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2497                                 break;
2498                         case 8:
2499                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2503                                 break;
2504                         case 9:
2505                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2506                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2509                                 break;
2510                         case 10:
2511                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2515                                 break;
2516                         case 11:
2517                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2518                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2521                                 break;
2522                         case 12:
2523                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2527                                 break;
2528                         case 13:
2529                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2532                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2533                                 break;
2534                         case 14:
2535                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2538                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2539                                 break;
2540                         default:
2541                                 gb_tile_moden = 0;
2542                                 break;
2543                         }
2544                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545                 }
2546         } else
2547                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2548 }
2549
2550 /**
2551  * cik_select_se_sh - select which SE, SH to address
2552  *
2553  * @rdev: radeon_device pointer
2554  * @se_num: shader engine to address
2555  * @sh_num: sh block to address
2556  *
2557  * Select which SE, SH combinations to address. Certain
2558  * registers are instanced per SE or SH.  0xffffffff means
2559  * broadcast to all SEs or SHs (CIK).
2560  */
2561 static void cik_select_se_sh(struct radeon_device *rdev,
2562                              u32 se_num, u32 sh_num)
2563 {
2564         u32 data = INSTANCE_BROADCAST_WRITES;
2565
2566         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2567                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2568         else if (se_num == 0xffffffff)
2569                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2570         else if (sh_num == 0xffffffff)
2571                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2572         else
2573                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2574         WREG32(GRBM_GFX_INDEX, data);
2575 }
2576
2577 /**
2578  * cik_create_bitmask - create a bitmask
2579  *
2580  * @bit_width: length of the mask
2581  *
2582  * create a variable length bit mask (CIK).
2583  * Returns the bitmask.
2584  */
2585 static u32 cik_create_bitmask(u32 bit_width)
2586 {
2587         u32 i, mask = 0;
2588
2589         for (i = 0; i < bit_width; i++) {
2590                 mask <<= 1;
2591                 mask |= 1;
2592         }
2593         return mask;
2594 }
2595
2596 /**
2597  * cik_select_se_sh - select which SE, SH to address
2598  *
2599  * @rdev: radeon_device pointer
2600  * @max_rb_num: max RBs (render backends) for the asic
2601  * @se_num: number of SEs (shader engines) for the asic
2602  * @sh_per_se: number of SH blocks per SE for the asic
2603  *
2604  * Calculates the bitmask of disabled RBs (CIK).
2605  * Returns the disabled RB bitmask.
2606  */
2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2608                               u32 max_rb_num, u32 se_num,
2609                               u32 sh_per_se)
2610 {
2611         u32 data, mask;
2612
2613         data = RREG32(CC_RB_BACKEND_DISABLE);
2614         if (data & 1)
2615                 data &= BACKEND_DISABLE_MASK;
2616         else
2617                 data = 0;
2618         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2619
2620         data >>= BACKEND_DISABLE_SHIFT;
2621
2622         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2623
2624         return data & mask;
2625 }
2626
2627 /**
2628  * cik_setup_rb - setup the RBs on the asic
2629  *
2630  * @rdev: radeon_device pointer
2631  * @se_num: number of SEs (shader engines) for the asic
2632  * @sh_per_se: number of SH blocks per SE for the asic
2633  * @max_rb_num: max RBs (render backends) for the asic
2634  *
2635  * Configures per-SE/SH RB registers (CIK).
2636  */
2637 static void cik_setup_rb(struct radeon_device *rdev,
2638                          u32 se_num, u32 sh_per_se,
2639                          u32 max_rb_num)
2640 {
2641         int i, j;
2642         u32 data, mask;
2643         u32 disabled_rbs = 0;
2644         u32 enabled_rbs = 0;
2645
2646         for (i = 0; i < se_num; i++) {
2647                 for (j = 0; j < sh_per_se; j++) {
2648                         cik_select_se_sh(rdev, i, j);
2649                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2650                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2651                 }
2652         }
2653         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2654
2655         mask = 1;
2656         for (i = 0; i < max_rb_num; i++) {
2657                 if (!(disabled_rbs & mask))
2658                         enabled_rbs |= mask;
2659                 mask <<= 1;
2660         }
2661
2662         for (i = 0; i < se_num; i++) {
2663                 cik_select_se_sh(rdev, i, 0xffffffff);
2664                 data = 0;
2665                 for (j = 0; j < sh_per_se; j++) {
2666                         switch (enabled_rbs & 3) {
2667                         case 1:
2668                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2669                                 break;
2670                         case 2:
2671                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2672                                 break;
2673                         case 3:
2674                         default:
2675                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2676                                 break;
2677                         }
2678                         enabled_rbs >>= 2;
2679                 }
2680                 WREG32(PA_SC_RASTER_CONFIG, data);
2681         }
2682         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2683 }
2684
2685 /**
2686  * cik_gpu_init - setup the 3D engine
2687  *
2688  * @rdev: radeon_device pointer
2689  *
2690  * Configures the 3D engine and tiling configuration
2691  * registers so that the 3D engine is usable.
2692  */
2693 static void cik_gpu_init(struct radeon_device *rdev)
2694 {
2695         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2696         u32 mc_shared_chmap, mc_arb_ramcfg;
2697         u32 hdp_host_path_cntl;
2698         u32 tmp;
2699         int i, j;
2700
2701         switch (rdev->family) {
2702         case CHIP_BONAIRE:
2703                 rdev->config.cik.max_shader_engines = 2;
2704                 rdev->config.cik.max_tile_pipes = 4;
2705                 rdev->config.cik.max_cu_per_sh = 7;
2706                 rdev->config.cik.max_sh_per_se = 1;
2707                 rdev->config.cik.max_backends_per_se = 2;
2708                 rdev->config.cik.max_texture_channel_caches = 4;
2709                 rdev->config.cik.max_gprs = 256;
2710                 rdev->config.cik.max_gs_threads = 32;
2711                 rdev->config.cik.max_hw_contexts = 8;
2712
2713                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2714                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2715                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2716                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2717                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2718                 break;
2719         case CHIP_KAVERI:
2720                 rdev->config.cik.max_shader_engines = 1;
2721                 rdev->config.cik.max_tile_pipes = 4;
2722                 if ((rdev->pdev->device == 0x1304) ||
2723                     (rdev->pdev->device == 0x1305) ||
2724                     (rdev->pdev->device == 0x130C) ||
2725                     (rdev->pdev->device == 0x130F) ||
2726                     (rdev->pdev->device == 0x1310) ||
2727                     (rdev->pdev->device == 0x1311) ||
2728                     (rdev->pdev->device == 0x131C)) {
2729                         rdev->config.cik.max_cu_per_sh = 8;
2730                         rdev->config.cik.max_backends_per_se = 2;
2731                 } else if ((rdev->pdev->device == 0x1309) ||
2732                            (rdev->pdev->device == 0x130A) ||
2733                            (rdev->pdev->device == 0x130D) ||
2734                            (rdev->pdev->device == 0x1313) ||
2735                            (rdev->pdev->device == 0x131D)) {
2736                         rdev->config.cik.max_cu_per_sh = 6;
2737                         rdev->config.cik.max_backends_per_se = 2;
2738                 } else if ((rdev->pdev->device == 0x1306) ||
2739                            (rdev->pdev->device == 0x1307) ||
2740                            (rdev->pdev->device == 0x130B) ||
2741                            (rdev->pdev->device == 0x130E) ||
2742                            (rdev->pdev->device == 0x1315) ||
2743                            (rdev->pdev->device == 0x131B)) {
2744                         rdev->config.cik.max_cu_per_sh = 4;
2745                         rdev->config.cik.max_backends_per_se = 1;
2746                 } else {
2747                         rdev->config.cik.max_cu_per_sh = 3;
2748                         rdev->config.cik.max_backends_per_se = 1;
2749                 }
2750                 rdev->config.cik.max_sh_per_se = 1;
2751                 rdev->config.cik.max_texture_channel_caches = 4;
2752                 rdev->config.cik.max_gprs = 256;
2753                 rdev->config.cik.max_gs_threads = 16;
2754                 rdev->config.cik.max_hw_contexts = 8;
2755
2756                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2757                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2758                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2759                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2760                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2761                 break;
2762         case CHIP_KABINI:
2763         default:
2764                 rdev->config.cik.max_shader_engines = 1;
2765                 rdev->config.cik.max_tile_pipes = 2;
2766                 rdev->config.cik.max_cu_per_sh = 2;
2767                 rdev->config.cik.max_sh_per_se = 1;
2768                 rdev->config.cik.max_backends_per_se = 1;
2769                 rdev->config.cik.max_texture_channel_caches = 2;
2770                 rdev->config.cik.max_gprs = 256;
2771                 rdev->config.cik.max_gs_threads = 16;
2772                 rdev->config.cik.max_hw_contexts = 8;
2773
2774                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2775                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2776                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2777                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2778                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2779                 break;
2780         }
2781
2782         /* Initialize HDP */
2783         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2784                 WREG32((0x2c14 + j), 0x00000000);
2785                 WREG32((0x2c18 + j), 0x00000000);
2786                 WREG32((0x2c1c + j), 0x00000000);
2787                 WREG32((0x2c20 + j), 0x00000000);
2788                 WREG32((0x2c24 + j), 0x00000000);
2789         }
2790
2791         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2792
2793         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2794
2795         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2796         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2797
2798         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2799         rdev->config.cik.mem_max_burst_length_bytes = 256;
2800         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2801         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2802         if (rdev->config.cik.mem_row_size_in_kb > 4)
2803                 rdev->config.cik.mem_row_size_in_kb = 4;
2804         /* XXX use MC settings? */
2805         rdev->config.cik.shader_engine_tile_size = 32;
2806         rdev->config.cik.num_gpus = 1;
2807         rdev->config.cik.multi_gpu_tile_size = 64;
2808
2809         /* fix up row size */
2810         gb_addr_config &= ~ROW_SIZE_MASK;
2811         switch (rdev->config.cik.mem_row_size_in_kb) {
2812         case 1:
2813         default:
2814                 gb_addr_config |= ROW_SIZE(0);
2815                 break;
2816         case 2:
2817                 gb_addr_config |= ROW_SIZE(1);
2818                 break;
2819         case 4:
2820                 gb_addr_config |= ROW_SIZE(2);
2821                 break;
2822         }
2823
2824         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2825          * not have bank info, so create a custom tiling dword.
2826          * bits 3:0   num_pipes
2827          * bits 7:4   num_banks
2828          * bits 11:8  group_size
2829          * bits 15:12 row_size
2830          */
2831         rdev->config.cik.tile_config = 0;
2832         switch (rdev->config.cik.num_tile_pipes) {
2833         case 1:
2834                 rdev->config.cik.tile_config |= (0 << 0);
2835                 break;
2836         case 2:
2837                 rdev->config.cik.tile_config |= (1 << 0);
2838                 break;
2839         case 4:
2840                 rdev->config.cik.tile_config |= (2 << 0);
2841                 break;
2842         case 8:
2843         default:
2844                 /* XXX what about 12? */
2845                 rdev->config.cik.tile_config |= (3 << 0);
2846                 break;
2847         }
2848         rdev->config.cik.tile_config |=
2849                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2850         rdev->config.cik.tile_config |=
2851                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2852         rdev->config.cik.tile_config |=
2853                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2854
2855         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2856         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2857         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2858         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2859         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2860         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2861         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2862         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2863
2864         cik_tiling_mode_table_init(rdev);
2865
2866         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2867                      rdev->config.cik.max_sh_per_se,
2868                      rdev->config.cik.max_backends_per_se);
2869
2870         /* set HW defaults for 3D engine */
2871         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2872
2873         WREG32(SX_DEBUG_1, 0x20);
2874
2875         WREG32(TA_CNTL_AUX, 0x00010000);
2876
2877         tmp = RREG32(SPI_CONFIG_CNTL);
2878         tmp |= 0x03000000;
2879         WREG32(SPI_CONFIG_CNTL, tmp);
2880
2881         WREG32(SQ_CONFIG, 1);
2882
2883         WREG32(DB_DEBUG, 0);
2884
2885         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2886         tmp |= 0x00000400;
2887         WREG32(DB_DEBUG2, tmp);
2888
2889         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2890         tmp |= 0x00020200;
2891         WREG32(DB_DEBUG3, tmp);
2892
2893         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2894         tmp |= 0x00018208;
2895         WREG32(CB_HW_CONTROL, tmp);
2896
2897         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2898
2899         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2900                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2901                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2902                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2903
2904         WREG32(VGT_NUM_INSTANCES, 1);
2905
2906         WREG32(CP_PERFMON_CNTL, 0);
2907
2908         WREG32(SQ_CONFIG, 0);
2909
2910         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2911                                           FORCE_EOV_MAX_REZ_CNT(255)));
2912
2913         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2914                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2915
2916         WREG32(VGT_GS_VERTEX_REUSE, 16);
2917         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2918
2919         tmp = RREG32(HDP_MISC_CNTL);
2920         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2921         WREG32(HDP_MISC_CNTL, tmp);
2922
2923         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2924         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2925
2926         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2927         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2928
2929         udelay(50);
2930 }
2931
2932 /*
2933  * GPU scratch registers helpers function.
2934  */
2935 /**
2936  * cik_scratch_init - setup driver info for CP scratch regs
2937  *
2938  * @rdev: radeon_device pointer
2939  *
2940  * Set up the number and offset of the CP scratch registers.
2941  * NOTE: use of CP scratch registers is a legacy inferface and
2942  * is not used by default on newer asics (r6xx+).  On newer asics,
2943  * memory buffers are used for fences rather than scratch regs.
2944  */
2945 static void cik_scratch_init(struct radeon_device *rdev)
2946 {
2947         int i;
2948
2949         rdev->scratch.num_reg = 7;
2950         rdev->scratch.reg_base = SCRATCH_REG0;
2951         for (i = 0; i < rdev->scratch.num_reg; i++) {
2952                 rdev->scratch.free[i] = true;
2953                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2954         }
2955 }
2956
2957 /**
2958  * cik_ring_test - basic gfx ring test
2959  *
2960  * @rdev: radeon_device pointer
2961  * @ring: radeon_ring structure holding ring information
2962  *
2963  * Allocate a scratch register and write to it using the gfx ring (CIK).
2964  * Provides a basic gfx ring test to verify that the ring is working.
2965  * Used by cik_cp_gfx_resume();
2966  * Returns 0 on success, error on failure.
2967  */
2968 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2969 {
2970         uint32_t scratch;
2971         uint32_t tmp = 0;
2972         unsigned i;
2973         int r;
2974
2975         r = radeon_scratch_get(rdev, &scratch);
2976         if (r) {
2977                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2978                 return r;
2979         }
2980         WREG32(scratch, 0xCAFEDEAD);
2981         r = radeon_ring_lock(rdev, ring, 3);
2982         if (r) {
2983                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2984                 radeon_scratch_free(rdev, scratch);
2985                 return r;
2986         }
2987         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2988         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2989         radeon_ring_write(ring, 0xDEADBEEF);
2990         radeon_ring_unlock_commit(rdev, ring);
2991
2992         for (i = 0; i < rdev->usec_timeout; i++) {
2993                 tmp = RREG32(scratch);
2994                 if (tmp == 0xDEADBEEF)
2995                         break;
2996                 DRM_UDELAY(1);
2997         }
2998         if (i < rdev->usec_timeout) {
2999                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3000         } else {
3001                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3002                           ring->idx, scratch, tmp);
3003                 r = -EINVAL;
3004         }
3005         radeon_scratch_free(rdev, scratch);
3006         return r;
3007 }
3008
3009 /**
3010  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3011  *
3012  * @rdev: radeon_device pointer
3013  * @fence: radeon fence object
3014  *
3015  * Emits a fence sequnce number on the gfx ring and flushes
3016  * GPU caches.
3017  */
3018 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3019                              struct radeon_fence *fence)
3020 {
3021         struct radeon_ring *ring = &rdev->ring[fence->ring];
3022         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3023
3024         /* EVENT_WRITE_EOP - flush caches, send int */
3025         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3026         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3027                                  EOP_TC_ACTION_EN |
3028                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3029                                  EVENT_INDEX(5)));
3030         radeon_ring_write(ring, addr & 0xfffffffc);
3031         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3032         radeon_ring_write(ring, fence->seq);
3033         radeon_ring_write(ring, 0);
3034         /* HDP flush */
3035         /* We should be using the new WAIT_REG_MEM special op packet here
3036          * but it causes the CP to hang
3037          */
3038         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3039         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3040                                  WRITE_DATA_DST_SEL(0)));
3041         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3042         radeon_ring_write(ring, 0);
3043         radeon_ring_write(ring, 0);
3044 }
3045
3046 /**
3047  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3048  *
3049  * @rdev: radeon_device pointer
3050  * @fence: radeon fence object
3051  *
3052  * Emits a fence sequnce number on the compute ring and flushes
3053  * GPU caches.
3054  */
3055 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3056                                  struct radeon_fence *fence)
3057 {
3058         struct radeon_ring *ring = &rdev->ring[fence->ring];
3059         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3060
3061         /* RELEASE_MEM - flush caches, send int */
3062         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3063         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3064                                  EOP_TC_ACTION_EN |
3065                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3066                                  EVENT_INDEX(5)));
3067         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3068         radeon_ring_write(ring, addr & 0xfffffffc);
3069         radeon_ring_write(ring, upper_32_bits(addr));
3070         radeon_ring_write(ring, fence->seq);
3071         radeon_ring_write(ring, 0);
3072         /* HDP flush */
3073         /* We should be using the new WAIT_REG_MEM special op packet here
3074          * but it causes the CP to hang
3075          */
3076         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3077         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3078                                  WRITE_DATA_DST_SEL(0)));
3079         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3080         radeon_ring_write(ring, 0);
3081         radeon_ring_write(ring, 0);
3082 }
3083
3084 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3085                              struct radeon_ring *ring,
3086                              struct radeon_semaphore *semaphore,
3087                              bool emit_wait)
3088 {
3089         uint64_t addr = semaphore->gpu_addr;
3090         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3091
3092         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3093         radeon_ring_write(ring, addr & 0xffffffff);
3094         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3095 }
3096
3097 /*
3098  * IB stuff
3099  */
3100 /**
3101  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3102  *
3103  * @rdev: radeon_device pointer
3104  * @ib: radeon indirect buffer object
3105  *
3106  * Emits an DE (drawing engine) or CE (constant engine) IB
3107  * on the gfx ring.  IBs are usually generated by userspace
3108  * acceleration drivers and submitted to the kernel for
3109  * sheduling on the ring.  This function schedules the IB
3110  * on the gfx ring for execution by the GPU.
3111  */
3112 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3113 {
3114         struct radeon_ring *ring = &rdev->ring[ib->ring];
3115         u32 header, control = INDIRECT_BUFFER_VALID;
3116
3117         if (ib->is_const_ib) {
3118                 /* set switch buffer packet before const IB */
3119                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3120                 radeon_ring_write(ring, 0);
3121
3122                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3123         } else {
3124                 u32 next_rptr;
3125                 if (ring->rptr_save_reg) {
3126                         next_rptr = ring->wptr + 3 + 4;
3127                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3128                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3129                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3130                         radeon_ring_write(ring, next_rptr);
3131                 } else if (rdev->wb.enabled) {
3132                         next_rptr = ring->wptr + 5 + 4;
3133                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3134                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3135                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3136                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3137                         radeon_ring_write(ring, next_rptr);
3138                 }
3139
3140                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3141         }
3142
3143         control |= ib->length_dw |
3144                 (ib->vm ? (ib->vm->id << 24) : 0);
3145
3146         radeon_ring_write(ring, header);
3147         radeon_ring_write(ring,
3148 #ifdef __BIG_ENDIAN
3149                           (2 << 0) |
3150 #endif
3151                           (ib->gpu_addr & 0xFFFFFFFC));
3152         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3153         radeon_ring_write(ring, control);
3154 }
3155
3156 /**
3157  * cik_ib_test - basic gfx ring IB test
3158  *
3159  * @rdev: radeon_device pointer
3160  * @ring: radeon_ring structure holding ring information
3161  *
3162  * Allocate an IB and execute it on the gfx ring (CIK).
3163  * Provides a basic gfx ring test to verify that IBs are working.
3164  * Returns 0 on success, error on failure.
3165  */
3166 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3167 {
3168         struct radeon_ib ib;
3169         uint32_t scratch;
3170         uint32_t tmp = 0;
3171         unsigned i;
3172         int r;
3173
3174         r = radeon_scratch_get(rdev, &scratch);
3175         if (r) {
3176                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3177                 return r;
3178         }
3179         WREG32(scratch, 0xCAFEDEAD);
3180         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3181         if (r) {
3182                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3183                 return r;
3184         }
3185         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3186         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3187         ib.ptr[2] = 0xDEADBEEF;
3188         ib.length_dw = 3;
3189         r = radeon_ib_schedule(rdev, &ib, NULL);
3190         if (r) {
3191                 radeon_scratch_free(rdev, scratch);
3192                 radeon_ib_free(rdev, &ib);
3193                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3194                 return r;
3195         }
3196         r = radeon_fence_wait(ib.fence, false);
3197         if (r) {
3198                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3199                 return r;
3200         }
3201         for (i = 0; i < rdev->usec_timeout; i++) {
3202                 tmp = RREG32(scratch);
3203                 if (tmp == 0xDEADBEEF)
3204                         break;
3205                 DRM_UDELAY(1);
3206         }
3207         if (i < rdev->usec_timeout) {
3208                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3209         } else {
3210                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3211                           scratch, tmp);
3212                 r = -EINVAL;
3213         }
3214         radeon_scratch_free(rdev, scratch);
3215         radeon_ib_free(rdev, &ib);
3216         return r;
3217 }
3218
3219 /*
3220  * CP.
3221  * On CIK, gfx and compute now have independant command processors.
3222  *
3223  * GFX
3224  * Gfx consists of a single ring and can process both gfx jobs and
3225  * compute jobs.  The gfx CP consists of three microengines (ME):
3226  * PFP - Pre-Fetch Parser
3227  * ME - Micro Engine
3228  * CE - Constant Engine
3229  * The PFP and ME make up what is considered the Drawing Engine (DE).
3230  * The CE is an asynchronous engine used for updating buffer desciptors
3231  * used by the DE so that they can be loaded into cache in parallel
3232  * while the DE is processing state update packets.
3233  *
3234  * Compute
3235  * The compute CP consists of two microengines (ME):
3236  * MEC1 - Compute MicroEngine 1
3237  * MEC2 - Compute MicroEngine 2
3238  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3239  * The queues are exposed to userspace and are programmed directly
3240  * by the compute runtime.
3241  */
3242 /**
3243  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3244  *
3245  * @rdev: radeon_device pointer
3246  * @enable: enable or disable the MEs
3247  *
3248  * Halts or unhalts the gfx MEs.
3249  */
3250 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3251 {
3252         if (enable)
3253                 WREG32(CP_ME_CNTL, 0);
3254         else {
3255                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3256                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3257         }
3258         udelay(50);
3259 }
3260
3261 /**
3262  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3263  *
3264  * @rdev: radeon_device pointer
3265  *
3266  * Loads the gfx PFP, ME, and CE ucode.
3267  * Returns 0 for success, -EINVAL if the ucode is not available.
3268  */
3269 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3270 {
3271         const __be32 *fw_data;
3272         int i;
3273
3274         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3275                 return -EINVAL;
3276
3277         cik_cp_gfx_enable(rdev, false);
3278
3279         /* PFP */
3280         fw_data = (const __be32 *)rdev->pfp_fw->data;
3281         WREG32(CP_PFP_UCODE_ADDR, 0);
3282         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3283                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3284         WREG32(CP_PFP_UCODE_ADDR, 0);
3285
3286         /* CE */
3287         fw_data = (const __be32 *)rdev->ce_fw->data;
3288         WREG32(CP_CE_UCODE_ADDR, 0);
3289         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3290                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3291         WREG32(CP_CE_UCODE_ADDR, 0);
3292
3293         /* ME */
3294         fw_data = (const __be32 *)rdev->me_fw->data;
3295         WREG32(CP_ME_RAM_WADDR, 0);
3296         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3297                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3298         WREG32(CP_ME_RAM_WADDR, 0);
3299
3300         WREG32(CP_PFP_UCODE_ADDR, 0);
3301         WREG32(CP_CE_UCODE_ADDR, 0);
3302         WREG32(CP_ME_RAM_WADDR, 0);
3303         WREG32(CP_ME_RAM_RADDR, 0);
3304         return 0;
3305 }
3306
3307 /**
3308  * cik_cp_gfx_start - start the gfx ring
3309  *
3310  * @rdev: radeon_device pointer
3311  *
3312  * Enables the ring and loads the clear state context and other
3313  * packets required to init the ring.
3314  * Returns 0 for success, error for failure.
3315  */
3316 static int cik_cp_gfx_start(struct radeon_device *rdev)
3317 {
3318         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3319         int r, i;
3320
3321         /* init the CP */
3322         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3323         WREG32(CP_ENDIAN_SWAP, 0);
3324         WREG32(CP_DEVICE_ID, 1);
3325
3326         cik_cp_gfx_enable(rdev, true);
3327
3328         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3329         if (r) {
3330                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3331                 return r;
3332         }
3333
3334         /* init the CE partitions.  CE only used for gfx on CIK */
3335         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3336         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3337         radeon_ring_write(ring, 0xc000);
3338         radeon_ring_write(ring, 0xc000);
3339
3340         /* setup clear context state */
3341         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3343
3344         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3345         radeon_ring_write(ring, 0x80000000);
3346         radeon_ring_write(ring, 0x80000000);
3347
3348         for (i = 0; i < cik_default_size; i++)
3349                 radeon_ring_write(ring, cik_default_state[i]);
3350
3351         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3352         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3353
3354         /* set clear context state */
3355         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3356         radeon_ring_write(ring, 0);
3357
3358         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3359         radeon_ring_write(ring, 0x00000316);
3360         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3361         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3362
3363         radeon_ring_unlock_commit(rdev, ring);
3364
3365         return 0;
3366 }
3367
3368 /**
3369  * cik_cp_gfx_fini - stop the gfx ring
3370  *
3371  * @rdev: radeon_device pointer
3372  *
3373  * Stop the gfx ring and tear down the driver ring
3374  * info.
3375  */
3376 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3377 {
3378         cik_cp_gfx_enable(rdev, false);
3379         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3380 }
3381
3382 /**
3383  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3384  *
3385  * @rdev: radeon_device pointer
3386  *
3387  * Program the location and size of the gfx ring buffer
3388  * and test it to make sure it's working.
3389  * Returns 0 for success, error for failure.
3390  */
3391 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3392 {
3393         struct radeon_ring *ring;
3394         u32 tmp;
3395         u32 rb_bufsz;
3396         u64 rb_addr;
3397         int r;
3398
3399         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3400         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3401
3402         /* Set the write pointer delay */
3403         WREG32(CP_RB_WPTR_DELAY, 0);
3404
3405         /* set the RB to use vmid 0 */
3406         WREG32(CP_RB_VMID, 0);
3407
3408         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3409
3410         /* ring 0 - compute and gfx */
3411         /* Set ring buffer size */
3412         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3413         rb_bufsz = order_base_2(ring->ring_size / 8);
3414         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3415 #ifdef __BIG_ENDIAN
3416         tmp |= BUF_SWAP_32BIT;
3417 #endif
3418         WREG32(CP_RB0_CNTL, tmp);
3419
3420         /* Initialize the ring buffer's read and write pointers */
3421         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3422         ring->wptr = 0;
3423         WREG32(CP_RB0_WPTR, ring->wptr);
3424
3425         /* set the wb address wether it's enabled or not */
3426         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3427         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3428
3429         /* scratch register shadowing is no longer supported */
3430         WREG32(SCRATCH_UMSK, 0);
3431
3432         if (!rdev->wb.enabled)
3433                 tmp |= RB_NO_UPDATE;
3434
3435         mdelay(1);
3436         WREG32(CP_RB0_CNTL, tmp);
3437
3438         rb_addr = ring->gpu_addr >> 8;
3439         WREG32(CP_RB0_BASE, rb_addr);
3440         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3441
3442         ring->rptr = RREG32(CP_RB0_RPTR);
3443
3444         /* start the ring */
3445         cik_cp_gfx_start(rdev);
3446         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3447         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3448         if (r) {
3449                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3450                 return r;
3451         }
3452         return 0;
3453 }
3454
3455 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3456                               struct radeon_ring *ring)
3457 {
3458         u32 rptr;
3459
3460
3461
3462         if (rdev->wb.enabled) {
3463                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3464         } else {
3465                 mutex_lock(&rdev->srbm_mutex);
3466                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3467                 rptr = RREG32(CP_HQD_PQ_RPTR);
3468                 cik_srbm_select(rdev, 0, 0, 0, 0);
3469                 mutex_unlock(&rdev->srbm_mutex);
3470         }
3471
3472         return rptr;
3473 }
3474
3475 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3476                               struct radeon_ring *ring)
3477 {
3478         u32 wptr;
3479
3480         if (rdev->wb.enabled) {
3481                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3482         } else {
3483                 mutex_lock(&rdev->srbm_mutex);
3484                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3485                 wptr = RREG32(CP_HQD_PQ_WPTR);
3486                 cik_srbm_select(rdev, 0, 0, 0, 0);
3487                 mutex_unlock(&rdev->srbm_mutex);
3488         }
3489
3490         return wptr;
3491 }
3492
3493 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3494                                struct radeon_ring *ring)
3495 {
3496         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3497         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3498 }
3499
3500 /**
3501  * cik_cp_compute_enable - enable/disable the compute CP MEs
3502  *
3503  * @rdev: radeon_device pointer
3504  * @enable: enable or disable the MEs
3505  *
3506  * Halts or unhalts the compute MEs.
3507  */
3508 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3509 {
3510         if (enable)
3511                 WREG32(CP_MEC_CNTL, 0);
3512         else
3513                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3514         udelay(50);
3515 }
3516
3517 /**
3518  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3519  *
3520  * @rdev: radeon_device pointer
3521  *
3522  * Loads the compute MEC1&2 ucode.
3523  * Returns 0 for success, -EINVAL if the ucode is not available.
3524  */
3525 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3526 {
3527         const __be32 *fw_data;
3528         int i;
3529
3530         if (!rdev->mec_fw)
3531                 return -EINVAL;
3532
3533         cik_cp_compute_enable(rdev, false);
3534
3535         /* MEC1 */
3536         fw_data = (const __be32 *)rdev->mec_fw->data;
3537         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3538         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3540         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3541
3542         if (rdev->family == CHIP_KAVERI) {
3543                 /* MEC2 */
3544                 fw_data = (const __be32 *)rdev->mec_fw->data;
3545                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3546                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3547                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3548                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3549         }
3550
3551         return 0;
3552 }
3553
3554 /**
3555  * cik_cp_compute_start - start the compute queues
3556  *
3557  * @rdev: radeon_device pointer
3558  *
3559  * Enable the compute queues.
3560  * Returns 0 for success, error for failure.
3561  */
3562 static int cik_cp_compute_start(struct radeon_device *rdev)
3563 {
3564         cik_cp_compute_enable(rdev, true);
3565
3566         return 0;
3567 }
3568
3569 /**
3570  * cik_cp_compute_fini - stop the compute queues
3571  *
3572  * @rdev: radeon_device pointer
3573  *
3574  * Stop the compute queues and tear down the driver queue
3575  * info.
3576  */
3577 static void cik_cp_compute_fini(struct radeon_device *rdev)
3578 {
3579         int i, idx, r;
3580
3581         cik_cp_compute_enable(rdev, false);
3582
3583         for (i = 0; i < 2; i++) {
3584                 if (i == 0)
3585                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3586                 else
3587                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3588
3589                 if (rdev->ring[idx].mqd_obj) {
3590                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3591                         if (unlikely(r != 0))
3592                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3593
3594                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3595                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3596
3597                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3598                         rdev->ring[idx].mqd_obj = NULL;
3599                 }
3600         }
3601 }
3602
3603 static void cik_mec_fini(struct radeon_device *rdev)
3604 {
3605         int r;
3606
3607         if (rdev->mec.hpd_eop_obj) {
3608                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3609                 if (unlikely(r != 0))
3610                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3611                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3612                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3613
3614                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3615                 rdev->mec.hpd_eop_obj = NULL;
3616         }
3617 }
3618
3619 #define MEC_HPD_SIZE 2048
3620
3621 static int cik_mec_init(struct radeon_device *rdev)
3622 {
3623         int r;
3624         u32 *hpd;
3625
3626         /*
3627          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3628          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3629          */
3630         if (rdev->family == CHIP_KAVERI)
3631                 rdev->mec.num_mec = 2;
3632         else
3633                 rdev->mec.num_mec = 1;
3634         rdev->mec.num_pipe = 4;
3635         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3636
3637         if (rdev->mec.hpd_eop_obj == NULL) {
3638                 r = radeon_bo_create(rdev,
3639                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3640                                      PAGE_SIZE, true,
3641                                      RADEON_GEM_DOMAIN_GTT, NULL,
3642                                      &rdev->mec.hpd_eop_obj);
3643                 if (r) {
3644                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3645                         return r;
3646                 }
3647         }
3648
3649         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3650         if (unlikely(r != 0)) {
3651                 cik_mec_fini(rdev);
3652                 return r;
3653         }
3654         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3655                           &rdev->mec.hpd_eop_gpu_addr);
3656         if (r) {
3657                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3658                 cik_mec_fini(rdev);
3659                 return r;
3660         }
3661         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3662         if (r) {
3663                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3664                 cik_mec_fini(rdev);
3665                 return r;
3666         }
3667
3668         /* clear memory.  Not sure if this is required or not */
3669         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3670
3671         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3672         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3673
3674         return 0;
3675 }
3676
3677 struct hqd_registers
3678 {
3679         u32 cp_mqd_base_addr;
3680         u32 cp_mqd_base_addr_hi;
3681         u32 cp_hqd_active;
3682         u32 cp_hqd_vmid;
3683         u32 cp_hqd_persistent_state;
3684         u32 cp_hqd_pipe_priority;
3685         u32 cp_hqd_queue_priority;
3686         u32 cp_hqd_quantum;
3687         u32 cp_hqd_pq_base;
3688         u32 cp_hqd_pq_base_hi;
3689         u32 cp_hqd_pq_rptr;
3690         u32 cp_hqd_pq_rptr_report_addr;
3691         u32 cp_hqd_pq_rptr_report_addr_hi;
3692         u32 cp_hqd_pq_wptr_poll_addr;
3693         u32 cp_hqd_pq_wptr_poll_addr_hi;
3694         u32 cp_hqd_pq_doorbell_control;
3695         u32 cp_hqd_pq_wptr;
3696         u32 cp_hqd_pq_control;
3697         u32 cp_hqd_ib_base_addr;
3698         u32 cp_hqd_ib_base_addr_hi;
3699         u32 cp_hqd_ib_rptr;
3700         u32 cp_hqd_ib_control;
3701         u32 cp_hqd_iq_timer;
3702         u32 cp_hqd_iq_rptr;
3703         u32 cp_hqd_dequeue_request;
3704         u32 cp_hqd_dma_offload;
3705         u32 cp_hqd_sema_cmd;
3706         u32 cp_hqd_msg_type;
3707         u32 cp_hqd_atomic0_preop_lo;
3708         u32 cp_hqd_atomic0_preop_hi;
3709         u32 cp_hqd_atomic1_preop_lo;
3710         u32 cp_hqd_atomic1_preop_hi;
3711         u32 cp_hqd_hq_scheduler0;
3712         u32 cp_hqd_hq_scheduler1;
3713         u32 cp_mqd_control;
3714 };
3715
3716 struct bonaire_mqd
3717 {
3718         u32 header;
3719         u32 dispatch_initiator;
3720         u32 dimensions[3];
3721         u32 start_idx[3];
3722         u32 num_threads[3];
3723         u32 pipeline_stat_enable;
3724         u32 perf_counter_enable;
3725         u32 pgm[2];
3726         u32 tba[2];
3727         u32 tma[2];
3728         u32 pgm_rsrc[2];
3729         u32 vmid;
3730         u32 resource_limits;
3731         u32 static_thread_mgmt01[2];
3732         u32 tmp_ring_size;
3733         u32 static_thread_mgmt23[2];
3734         u32 restart[3];
3735         u32 thread_trace_enable;
3736         u32 reserved1;
3737         u32 user_data[16];
3738         u32 vgtcs_invoke_count[2];
3739         struct hqd_registers queue_state;
3740         u32 dequeue_cntr;
3741         u32 interrupt_queue[64];
3742 };
3743
3744 /**
3745  * cik_cp_compute_resume - setup the compute queue registers
3746  *
3747  * @rdev: radeon_device pointer
3748  *
3749  * Program the compute queues and test them to make sure they
3750  * are working.
3751  * Returns 0 for success, error for failure.
3752  */
3753 static int cik_cp_compute_resume(struct radeon_device *rdev)
3754 {
3755         int r, i, idx;
3756         u32 tmp;
3757         bool use_doorbell = true;
3758         u64 hqd_gpu_addr;
3759         u64 mqd_gpu_addr;
3760         u64 eop_gpu_addr;
3761         u64 wb_gpu_addr;
3762         u32 *buf;
3763         struct bonaire_mqd *mqd;
3764
3765         r = cik_cp_compute_start(rdev);
3766         if (r)
3767                 return r;
3768
3769         /* fix up chicken bits */
3770         tmp = RREG32(CP_CPF_DEBUG);
3771         tmp |= (1 << 23);
3772         WREG32(CP_CPF_DEBUG, tmp);
3773
3774         /* init the pipes */
3775         mutex_lock(&rdev->srbm_mutex);
3776         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3777                 int me = (i < 4) ? 1 : 2;
3778                 int pipe = (i < 4) ? i : (i - 4);
3779
3780                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3781
3782                 cik_srbm_select(rdev, me, pipe, 0, 0);
3783
3784                 /* write the EOP addr */
3785                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3786                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3787
3788                 /* set the VMID assigned */
3789                 WREG32(CP_HPD_EOP_VMID, 0);
3790
3791                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3792                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3793                 tmp &= ~EOP_SIZE_MASK;
3794                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3795                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3796         }
3797         cik_srbm_select(rdev, 0, 0, 0, 0);
3798         mutex_unlock(&rdev->srbm_mutex);
3799
3800         /* init the queues.  Just two for now. */
3801         for (i = 0; i < 2; i++) {
3802                 if (i == 0)
3803                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3804                 else
3805                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3806
3807                 if (rdev->ring[idx].mqd_obj == NULL) {
3808                         r = radeon_bo_create(rdev,
3809                                              sizeof(struct bonaire_mqd),
3810                                              PAGE_SIZE, true,
3811                                              RADEON_GEM_DOMAIN_GTT, NULL,
3812                                              &rdev->ring[idx].mqd_obj);
3813                         if (r) {
3814                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3815                                 return r;
3816                         }
3817                 }
3818
3819                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3820                 if (unlikely(r != 0)) {
3821                         cik_cp_compute_fini(rdev);
3822                         return r;
3823                 }
3824                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3825                                   &mqd_gpu_addr);
3826                 if (r) {
3827                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3828                         cik_cp_compute_fini(rdev);
3829                         return r;
3830                 }
3831                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3832                 if (r) {
3833                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3834                         cik_cp_compute_fini(rdev);
3835                         return r;
3836                 }
3837
3838                 /* doorbell offset */
3839                 rdev->ring[idx].doorbell_offset =
3840                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3841
3842                 /* init the mqd struct */
3843                 memset(buf, 0, sizeof(struct bonaire_mqd));
3844
3845                 mqd = (struct bonaire_mqd *)buf;
3846                 mqd->header = 0xC0310800;
3847                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3848                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3849                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3850                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3851
3852                 mutex_lock(&rdev->srbm_mutex);
3853                 cik_srbm_select(rdev, rdev->ring[idx].me,
3854                                 rdev->ring[idx].pipe,
3855                                 rdev->ring[idx].queue, 0);
3856
3857                 /* disable wptr polling */
3858                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3859                 tmp &= ~WPTR_POLL_EN;
3860                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3861
3862                 /* enable doorbell? */
3863                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3864                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3865                 if (use_doorbell)
3866                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3867                 else
3868                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3869                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3870                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3871
3872                 /* disable the queue if it's active */
3873                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3874                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3875                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3876                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3877                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3878                         for (i = 0; i < rdev->usec_timeout; i++) {
3879                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3880                                         break;
3881                                 udelay(1);
3882                         }
3883                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3884                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3885                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3886                 }
3887
3888                 /* set the pointer to the MQD */
3889                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3890                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3891                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3892                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3893                 /* set MQD vmid to 0 */
3894                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3895                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3896                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3897
3898                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3899                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3900                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3901                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3902                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3903                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3904
3905                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3906                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3907                 mqd->queue_state.cp_hqd_pq_control &=
3908                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3909
3910                 mqd->queue_state.cp_hqd_pq_control |=
3911                         order_base_2(rdev->ring[idx].ring_size / 8);
3912                 mqd->queue_state.cp_hqd_pq_control |=
3913                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3914 #ifdef __BIG_ENDIAN
3915                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3916 #endif
3917                 mqd->queue_state.cp_hqd_pq_control &=
3918                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3919                 mqd->queue_state.cp_hqd_pq_control |=
3920                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3921                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3922
3923                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3924                 if (i == 0)
3925                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3926                 else
3927                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3928                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3929                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3930                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3931                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3932                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3933
3934                 /* set the wb address wether it's enabled or not */
3935                 if (i == 0)
3936                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3937                 else
3938                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3939                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3940                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3941                         upper_32_bits(wb_gpu_addr) & 0xffff;
3942                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3943                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3944                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3945                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3946
3947                 /* enable the doorbell if requested */
3948                 if (use_doorbell) {
3949                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3950                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3951                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3952                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3953                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3954                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3955                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3956                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3957
3958                 } else {
3959                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3960                 }
3961                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3962                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3963
3964                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3965                 rdev->ring[idx].wptr = 0;
3966                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3967                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3968                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3969                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3970
3971                 /* set the vmid for the queue */
3972                 mqd->queue_state.cp_hqd_vmid = 0;
3973                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3974
3975                 /* activate the queue */
3976                 mqd->queue_state.cp_hqd_active = 1;
3977                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3978
3979                 cik_srbm_select(rdev, 0, 0, 0, 0);
3980                 mutex_unlock(&rdev->srbm_mutex);
3981
3982                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3983                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3984
3985                 rdev->ring[idx].ready = true;
3986                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3987                 if (r)
3988                         rdev->ring[idx].ready = false;
3989         }
3990
3991         return 0;
3992 }
3993
3994 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3995 {
3996         cik_cp_gfx_enable(rdev, enable);
3997         cik_cp_compute_enable(rdev, enable);
3998 }
3999
4000 static int cik_cp_load_microcode(struct radeon_device *rdev)
4001 {
4002         int r;
4003
4004         r = cik_cp_gfx_load_microcode(rdev);
4005         if (r)
4006                 return r;
4007         r = cik_cp_compute_load_microcode(rdev);
4008         if (r)
4009                 return r;
4010
4011         return 0;
4012 }
4013
4014 static void cik_cp_fini(struct radeon_device *rdev)
4015 {
4016         cik_cp_gfx_fini(rdev);
4017         cik_cp_compute_fini(rdev);
4018 }
4019
4020 static int cik_cp_resume(struct radeon_device *rdev)
4021 {
4022         int r;
4023
4024         cik_enable_gui_idle_interrupt(rdev, false);
4025
4026         r = cik_cp_load_microcode(rdev);
4027         if (r)
4028                 return r;
4029
4030         r = cik_cp_gfx_resume(rdev);
4031         if (r)
4032                 return r;
4033         r = cik_cp_compute_resume(rdev);
4034         if (r)
4035                 return r;
4036
4037         cik_enable_gui_idle_interrupt(rdev, true);
4038
4039         return 0;
4040 }
4041
4042 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4043 {
4044         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4045                 RREG32(GRBM_STATUS));
4046         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4047                 RREG32(GRBM_STATUS2));
4048         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4049                 RREG32(GRBM_STATUS_SE0));
4050         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4051                 RREG32(GRBM_STATUS_SE1));
4052         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4053                 RREG32(GRBM_STATUS_SE2));
4054         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4055                 RREG32(GRBM_STATUS_SE3));
4056         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4057                 RREG32(SRBM_STATUS));
4058         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4059                 RREG32(SRBM_STATUS2));
4060         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4061                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4062         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4063                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4064         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4065         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4066                  RREG32(CP_STALLED_STAT1));
4067         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4068                  RREG32(CP_STALLED_STAT2));
4069         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4070                  RREG32(CP_STALLED_STAT3));
4071         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4072                  RREG32(CP_CPF_BUSY_STAT));
4073         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4074                  RREG32(CP_CPF_STALLED_STAT1));
4075         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4076         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4077         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4078                  RREG32(CP_CPC_STALLED_STAT1));
4079         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4080 }
4081
4082 /**
4083  * cik_gpu_check_soft_reset - check which blocks are busy
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Check which blocks are busy and return the relevant reset
4088  * mask to be used by cik_gpu_soft_reset().
4089  * Returns a mask of the blocks to be reset.
4090  */
4091 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4092 {
4093         u32 reset_mask = 0;
4094         u32 tmp;
4095
4096         /* GRBM_STATUS */
4097         tmp = RREG32(GRBM_STATUS);
4098         if (tmp & (PA_BUSY | SC_BUSY |
4099                    BCI_BUSY | SX_BUSY |
4100                    TA_BUSY | VGT_BUSY |
4101                    DB_BUSY | CB_BUSY |
4102                    GDS_BUSY | SPI_BUSY |
4103                    IA_BUSY | IA_BUSY_NO_DMA))
4104                 reset_mask |= RADEON_RESET_GFX;
4105
4106         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4107                 reset_mask |= RADEON_RESET_CP;
4108
4109         /* GRBM_STATUS2 */
4110         tmp = RREG32(GRBM_STATUS2);
4111         if (tmp & RLC_BUSY)
4112                 reset_mask |= RADEON_RESET_RLC;
4113
4114         /* SDMA0_STATUS_REG */
4115         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4116         if (!(tmp & SDMA_IDLE))
4117                 reset_mask |= RADEON_RESET_DMA;
4118
4119         /* SDMA1_STATUS_REG */
4120         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4121         if (!(tmp & SDMA_IDLE))
4122                 reset_mask |= RADEON_RESET_DMA1;
4123
4124         /* SRBM_STATUS2 */
4125         tmp = RREG32(SRBM_STATUS2);
4126         if (tmp & SDMA_BUSY)
4127                 reset_mask |= RADEON_RESET_DMA;
4128
4129         if (tmp & SDMA1_BUSY)
4130                 reset_mask |= RADEON_RESET_DMA1;
4131
4132         /* SRBM_STATUS */
4133         tmp = RREG32(SRBM_STATUS);
4134
4135         if (tmp & IH_BUSY)
4136                 reset_mask |= RADEON_RESET_IH;
4137
4138         if (tmp & SEM_BUSY)
4139                 reset_mask |= RADEON_RESET_SEM;
4140
4141         if (tmp & GRBM_RQ_PENDING)
4142                 reset_mask |= RADEON_RESET_GRBM;
4143
4144         if (tmp & VMC_BUSY)
4145                 reset_mask |= RADEON_RESET_VMC;
4146
4147         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4148                    MCC_BUSY | MCD_BUSY))
4149                 reset_mask |= RADEON_RESET_MC;
4150
4151         if (evergreen_is_display_hung(rdev))
4152                 reset_mask |= RADEON_RESET_DISPLAY;
4153
4154         /* Skip MC reset as it's mostly likely not hung, just busy */
4155         if (reset_mask & RADEON_RESET_MC) {
4156                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4157                 reset_mask &= ~RADEON_RESET_MC;
4158         }
4159
4160         return reset_mask;
4161 }
4162
4163 /**
4164  * cik_gpu_soft_reset - soft reset GPU
4165  *
4166  * @rdev: radeon_device pointer
4167  * @reset_mask: mask of which blocks to reset
4168  *
4169  * Soft reset the blocks specified in @reset_mask.
4170  */
4171 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4172 {
4173         struct evergreen_mc_save save;
4174         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4175         u32 tmp;
4176
4177         if (reset_mask == 0)
4178                 return;
4179
4180         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4181
4182         cik_print_gpu_status_regs(rdev);
4183         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4184                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4185         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4186                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4187
4188         /* stop the rlc */
4189         cik_rlc_stop(rdev);
4190
4191         /* Disable GFX parsing/prefetching */
4192         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4193
4194         /* Disable MEC parsing/prefetching */
4195         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4196
4197         if (reset_mask & RADEON_RESET_DMA) {
4198                 /* sdma0 */
4199                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4200                 tmp |= SDMA_HALT;
4201                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4202         }
4203         if (reset_mask & RADEON_RESET_DMA1) {
4204                 /* sdma1 */
4205                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4206                 tmp |= SDMA_HALT;
4207                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4208         }
4209
4210         evergreen_mc_stop(rdev, &save);
4211         if (evergreen_mc_wait_for_idle(rdev)) {
4212                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4213         }
4214
4215         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4216                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4217
4218         if (reset_mask & RADEON_RESET_CP) {
4219                 grbm_soft_reset |= SOFT_RESET_CP;
4220
4221                 srbm_soft_reset |= SOFT_RESET_GRBM;
4222         }
4223
4224         if (reset_mask & RADEON_RESET_DMA)
4225                 srbm_soft_reset |= SOFT_RESET_SDMA;
4226
4227         if (reset_mask & RADEON_RESET_DMA1)
4228                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4229
4230         if (reset_mask & RADEON_RESET_DISPLAY)
4231                 srbm_soft_reset |= SOFT_RESET_DC;
4232
4233         if (reset_mask & RADEON_RESET_RLC)
4234                 grbm_soft_reset |= SOFT_RESET_RLC;
4235
4236         if (reset_mask & RADEON_RESET_SEM)
4237                 srbm_soft_reset |= SOFT_RESET_SEM;
4238
4239         if (reset_mask & RADEON_RESET_IH)
4240                 srbm_soft_reset |= SOFT_RESET_IH;
4241
4242         if (reset_mask & RADEON_RESET_GRBM)
4243                 srbm_soft_reset |= SOFT_RESET_GRBM;
4244
4245         if (reset_mask & RADEON_RESET_VMC)
4246                 srbm_soft_reset |= SOFT_RESET_VMC;
4247
4248         if (!(rdev->flags & RADEON_IS_IGP)) {
4249                 if (reset_mask & RADEON_RESET_MC)
4250                         srbm_soft_reset |= SOFT_RESET_MC;
4251         }
4252
4253         if (grbm_soft_reset) {
4254                 tmp = RREG32(GRBM_SOFT_RESET);
4255                 tmp |= grbm_soft_reset;
4256                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4257                 WREG32(GRBM_SOFT_RESET, tmp);
4258                 tmp = RREG32(GRBM_SOFT_RESET);
4259
4260                 udelay(50);
4261
4262                 tmp &= ~grbm_soft_reset;
4263                 WREG32(GRBM_SOFT_RESET, tmp);
4264                 tmp = RREG32(GRBM_SOFT_RESET);
4265         }
4266
4267         if (srbm_soft_reset) {
4268                 tmp = RREG32(SRBM_SOFT_RESET);
4269                 tmp |= srbm_soft_reset;
4270                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4271                 WREG32(SRBM_SOFT_RESET, tmp);
4272                 tmp = RREG32(SRBM_SOFT_RESET);
4273
4274                 udelay(50);
4275
4276                 tmp &= ~srbm_soft_reset;
4277                 WREG32(SRBM_SOFT_RESET, tmp);
4278                 tmp = RREG32(SRBM_SOFT_RESET);
4279         }
4280
4281         /* Wait a little for things to settle down */
4282         udelay(50);
4283
4284         evergreen_mc_resume(rdev, &save);
4285         udelay(50);
4286
4287         cik_print_gpu_status_regs(rdev);
4288 }
4289
4290 /**
4291  * cik_asic_reset - soft reset GPU
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Look up which blocks are hung and attempt
4296  * to reset them.
4297  * Returns 0 for success.
4298  */
4299 int cik_asic_reset(struct radeon_device *rdev)
4300 {
4301         u32 reset_mask;
4302
4303         reset_mask = cik_gpu_check_soft_reset(rdev);
4304
4305         if (reset_mask)
4306                 r600_set_bios_scratch_engine_hung(rdev, true);
4307
4308         cik_gpu_soft_reset(rdev, reset_mask);
4309
4310         reset_mask = cik_gpu_check_soft_reset(rdev);
4311
4312         if (!reset_mask)
4313                 r600_set_bios_scratch_engine_hung(rdev, false);
4314
4315         return 0;
4316 }
4317
4318 /**
4319  * cik_gfx_is_lockup - check if the 3D engine is locked up
4320  *
4321  * @rdev: radeon_device pointer
4322  * @ring: radeon_ring structure holding ring information
4323  *
4324  * Check if the 3D engine is locked up (CIK).
4325  * Returns true if the engine is locked, false if not.
4326  */
4327 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4328 {
4329         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4330
4331         if (!(reset_mask & (RADEON_RESET_GFX |
4332                             RADEON_RESET_COMPUTE |
4333                             RADEON_RESET_CP))) {
4334                 radeon_ring_lockup_update(ring);
4335                 return false;
4336         }
4337         /* force CP activities */
4338         radeon_ring_force_activity(rdev, ring);
4339         return radeon_ring_test_lockup(rdev, ring);
4340 }
4341
4342 /* MC */
4343 /**
4344  * cik_mc_program - program the GPU memory controller
4345  *
4346  * @rdev: radeon_device pointer
4347  *
4348  * Set the location of vram, gart, and AGP in the GPU's
4349  * physical address space (CIK).
4350  */
4351 static void cik_mc_program(struct radeon_device *rdev)
4352 {
4353         struct evergreen_mc_save save;
4354         u32 tmp;
4355         int i, j;
4356
4357         /* Initialize HDP */
4358         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4359                 WREG32((0x2c14 + j), 0x00000000);
4360                 WREG32((0x2c18 + j), 0x00000000);
4361                 WREG32((0x2c1c + j), 0x00000000);
4362                 WREG32((0x2c20 + j), 0x00000000);
4363                 WREG32((0x2c24 + j), 0x00000000);
4364         }
4365         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4366
4367         evergreen_mc_stop(rdev, &save);
4368         if (radeon_mc_wait_for_idle(rdev)) {
4369                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4370         }
4371         /* Lockout access through VGA aperture*/
4372         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4373         /* Update configuration */
4374         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4375                rdev->mc.vram_start >> 12);
4376         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4377                rdev->mc.vram_end >> 12);
4378         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4379                rdev->vram_scratch.gpu_addr >> 12);
4380         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4381         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4382         WREG32(MC_VM_FB_LOCATION, tmp);
4383         /* XXX double check these! */
4384         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4385         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4386         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4387         WREG32(MC_VM_AGP_BASE, 0);
4388         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4389         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4390         if (radeon_mc_wait_for_idle(rdev)) {
4391                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4392         }
4393         evergreen_mc_resume(rdev, &save);
4394         /* we need to own VRAM, so turn off the VGA renderer here
4395          * to stop it overwriting our objects */
4396         rv515_vga_render_disable(rdev);
4397 }
4398
4399 /**
4400  * cik_mc_init - initialize the memory controller driver params
4401  *
4402  * @rdev: radeon_device pointer
4403  *
4404  * Look up the amount of vram, vram width, and decide how to place
4405  * vram and gart within the GPU's physical address space (CIK).
4406  * Returns 0 for success.
4407  */
4408 static int cik_mc_init(struct radeon_device *rdev)
4409 {
4410         u32 tmp;
4411         int chansize, numchan;
4412
4413         /* Get VRAM informations */
4414         rdev->mc.vram_is_ddr = true;
4415         tmp = RREG32(MC_ARB_RAMCFG);
4416         if (tmp & CHANSIZE_MASK) {
4417                 chansize = 64;
4418         } else {
4419                 chansize = 32;
4420         }
4421         tmp = RREG32(MC_SHARED_CHMAP);
4422         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4423         case 0:
4424         default:
4425                 numchan = 1;
4426                 break;
4427         case 1:
4428                 numchan = 2;
4429                 break;
4430         case 2:
4431                 numchan = 4;
4432                 break;
4433         case 3:
4434                 numchan = 8;
4435                 break;
4436         case 4:
4437                 numchan = 3;
4438                 break;
4439         case 5:
4440                 numchan = 6;
4441                 break;
4442         case 6:
4443                 numchan = 10;
4444                 break;
4445         case 7:
4446                 numchan = 12;
4447                 break;
4448         case 8:
4449                 numchan = 16;
4450                 break;
4451         }
4452         rdev->mc.vram_width = numchan * chansize;
4453         /* Could aper size report 0 ? */
4454         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4455         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4456         /* size in MB on si */
4457         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4458         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4459         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4460         si_vram_gtt_location(rdev, &rdev->mc);
4461         radeon_update_bandwidth_info(rdev);
4462
4463         return 0;
4464 }
4465
4466 /*
4467  * GART
4468  * VMID 0 is the physical GPU addresses as used by the kernel.
4469  * VMIDs 1-15 are used for userspace clients and are handled
4470  * by the radeon vm/hsa code.
4471  */
4472 /**
4473  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4474  *
4475  * @rdev: radeon_device pointer
4476  *
4477  * Flush the TLB for the VMID 0 page table (CIK).
4478  */
4479 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4480 {
4481         /* flush hdp cache */
4482         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4483
4484         /* bits 0-15 are the VM contexts0-15 */
4485         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4486 }
4487
4488 /**
4489  * cik_pcie_gart_enable - gart enable
4490  *
4491  * @rdev: radeon_device pointer
4492  *
4493  * This sets up the TLBs, programs the page tables for VMID0,
4494  * sets up the hw for VMIDs 1-15 which are allocated on
4495  * demand, and sets up the global locations for the LDS, GDS,
4496  * and GPUVM for FSA64 clients (CIK).
4497  * Returns 0 for success, errors for failure.
4498  */
4499 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4500 {
4501         int r, i;
4502
4503         if (rdev->gart.robj == NULL) {
4504                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4505                 return -EINVAL;
4506         }
4507         r = radeon_gart_table_vram_pin(rdev);
4508         if (r)
4509                 return r;
4510         radeon_gart_restore(rdev);
4511         /* Setup TLB control */
4512         WREG32(MC_VM_MX_L1_TLB_CNTL,
4513                (0xA << 7) |
4514                ENABLE_L1_TLB |
4515                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4516                ENABLE_ADVANCED_DRIVER_MODEL |
4517                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4518         /* Setup L2 cache */
4519         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4520                ENABLE_L2_FRAGMENT_PROCESSING |
4521                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4522                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4523                EFFECTIVE_L2_QUEUE_SIZE(7) |
4524                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4525         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4526         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4527                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4528         /* setup context0 */
4529         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4530         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4531         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4532         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4533                         (u32)(rdev->dummy_page.addr >> 12));
4534         WREG32(VM_CONTEXT0_CNTL2, 0);
4535         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4536                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4537
4538         WREG32(0x15D4, 0);
4539         WREG32(0x15D8, 0);
4540         WREG32(0x15DC, 0);
4541
4542         /* empty context1-15 */
4543         /* FIXME start with 4G, once using 2 level pt switch to full
4544          * vm size space
4545          */
4546         /* set vm size, must be a multiple of 4 */
4547         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4548         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4549         for (i = 1; i < 16; i++) {
4550                 if (i < 8)
4551                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4552                                rdev->gart.table_addr >> 12);
4553                 else
4554                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4555                                rdev->gart.table_addr >> 12);
4556         }
4557
4558         /* enable context1-15 */
4559         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4560                (u32)(rdev->dummy_page.addr >> 12));
4561         WREG32(VM_CONTEXT1_CNTL2, 4);
4562         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4563                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4564                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4565                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4566                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4567                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4568                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4569                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4571                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4573                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4575
4576         /* TC cache setup ??? */
4577         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4578         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4579         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4580
4581         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4582         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4583         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4584         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4585         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4586
4587         WREG32(TC_CFG_L1_VOLATILE, 0);
4588         WREG32(TC_CFG_L2_VOLATILE, 0);
4589
4590         if (rdev->family == CHIP_KAVERI) {
4591                 u32 tmp = RREG32(CHUB_CONTROL);
4592                 tmp &= ~BYPASS_VM;
4593                 WREG32(CHUB_CONTROL, tmp);
4594         }
4595
4596         /* XXX SH_MEM regs */
4597         /* where to put LDS, scratch, GPUVM in FSA64 space */
4598         mutex_lock(&rdev->srbm_mutex);
4599         for (i = 0; i < 16; i++) {
4600                 cik_srbm_select(rdev, 0, 0, 0, i);
4601                 /* CP and shaders */
4602                 WREG32(SH_MEM_CONFIG, 0);
4603                 WREG32(SH_MEM_APE1_BASE, 1);
4604                 WREG32(SH_MEM_APE1_LIMIT, 0);
4605                 WREG32(SH_MEM_BASES, 0);
4606                 /* SDMA GFX */
4607                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4608                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4609                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4610                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4611                 /* XXX SDMA RLC - todo */
4612         }
4613         cik_srbm_select(rdev, 0, 0, 0, 0);
4614         mutex_unlock(&rdev->srbm_mutex);
4615
4616         cik_pcie_gart_tlb_flush(rdev);
4617         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4618                  (unsigned)(rdev->mc.gtt_size >> 20),
4619                  (unsigned long long)rdev->gart.table_addr);
4620         rdev->gart.ready = true;
4621         return 0;
4622 }
4623
4624 /**
4625  * cik_pcie_gart_disable - gart disable
4626  *
4627  * @rdev: radeon_device pointer
4628  *
4629  * This disables all VM page table (CIK).
4630  */
4631 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4632 {
4633         /* Disable all tables */
4634         WREG32(VM_CONTEXT0_CNTL, 0);
4635         WREG32(VM_CONTEXT1_CNTL, 0);
4636         /* Setup TLB control */
4637         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4638                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4639         /* Setup L2 cache */
4640         WREG32(VM_L2_CNTL,
4641                ENABLE_L2_FRAGMENT_PROCESSING |
4642                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4643                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4644                EFFECTIVE_L2_QUEUE_SIZE(7) |
4645                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4646         WREG32(VM_L2_CNTL2, 0);
4647         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4648                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4649         radeon_gart_table_vram_unpin(rdev);
4650 }
4651
4652 /**
4653  * cik_pcie_gart_fini - vm fini callback
4654  *
4655  * @rdev: radeon_device pointer
4656  *
4657  * Tears down the driver GART/VM setup (CIK).
4658  */
4659 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4660 {
4661         cik_pcie_gart_disable(rdev);
4662         radeon_gart_table_vram_free(rdev);
4663         radeon_gart_fini(rdev);
4664 }
4665
4666 /* vm parser */
4667 /**
4668  * cik_ib_parse - vm ib_parse callback
4669  *
4670  * @rdev: radeon_device pointer
4671  * @ib: indirect buffer pointer
4672  *
4673  * CIK uses hw IB checking so this is a nop (CIK).
4674  */
4675 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4676 {
4677         return 0;
4678 }
4679
4680 /*
4681  * vm
4682  * VMID 0 is the physical GPU addresses as used by the kernel.
4683  * VMIDs 1-15 are used for userspace clients and are handled
4684  * by the radeon vm/hsa code.
4685  */
4686 /**
4687  * cik_vm_init - cik vm init callback
4688  *
4689  * @rdev: radeon_device pointer
4690  *
4691  * Inits cik specific vm parameters (number of VMs, base of vram for
4692  * VMIDs 1-15) (CIK).
4693  * Returns 0 for success.
4694  */
4695 int cik_vm_init(struct radeon_device *rdev)
4696 {
4697         /* number of VMs */
4698         rdev->vm_manager.nvm = 16;
4699         /* base offset of vram pages */
4700         if (rdev->flags & RADEON_IS_IGP) {
4701                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4702                 tmp <<= 22;
4703                 rdev->vm_manager.vram_base_offset = tmp;
4704         } else
4705                 rdev->vm_manager.vram_base_offset = 0;
4706
4707         return 0;
4708 }
4709
4710 /**
4711  * cik_vm_fini - cik vm fini callback
4712  *
4713  * @rdev: radeon_device pointer
4714  *
4715  * Tear down any asic specific VM setup (CIK).
4716  */
4717 void cik_vm_fini(struct radeon_device *rdev)
4718 {
4719 }
4720
4721 /**
4722  * cik_vm_decode_fault - print human readable fault info
4723  *
4724  * @rdev: radeon_device pointer
4725  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4726  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4727  *
4728  * Print human readable fault information (CIK).
4729  */
4730 static void cik_vm_decode_fault(struct radeon_device *rdev,
4731                                 u32 status, u32 addr, u32 mc_client)
4732 {
4733         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4734         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4735         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4736         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4737                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4738
4739         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4740                protections, vmid, addr,
4741                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4742                block, mc_client, mc_id);
4743 }
4744
4745 /**
4746  * cik_vm_flush - cik vm flush using the CP
4747  *
4748  * @rdev: radeon_device pointer
4749  *
4750  * Update the page table base and flush the VM TLB
4751  * using the CP (CIK).
4752  */
4753 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4754 {
4755         struct radeon_ring *ring = &rdev->ring[ridx];
4756
4757         if (vm == NULL)
4758                 return;
4759
4760         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4761         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4762                                  WRITE_DATA_DST_SEL(0)));
4763         if (vm->id < 8) {
4764                 radeon_ring_write(ring,
4765                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4766         } else {
4767                 radeon_ring_write(ring,
4768                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4769         }
4770         radeon_ring_write(ring, 0);
4771         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4772
4773         /* update SH_MEM_* regs */
4774         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4775         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4776                                  WRITE_DATA_DST_SEL(0)));
4777         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4778         radeon_ring_write(ring, 0);
4779         radeon_ring_write(ring, VMID(vm->id));
4780
4781         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4782         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4783                                  WRITE_DATA_DST_SEL(0)));
4784         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4785         radeon_ring_write(ring, 0);
4786
4787         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4788         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4789         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4790         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4791
4792         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4793         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4794                                  WRITE_DATA_DST_SEL(0)));
4795         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4796         radeon_ring_write(ring, 0);
4797         radeon_ring_write(ring, VMID(0));
4798
4799         /* HDP flush */
4800         /* We should be using the WAIT_REG_MEM packet here like in
4801          * cik_fence_ring_emit(), but it causes the CP to hang in this
4802          * context...
4803          */
4804         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4805         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4806                                  WRITE_DATA_DST_SEL(0)));
4807         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4808         radeon_ring_write(ring, 0);
4809         radeon_ring_write(ring, 0);
4810
4811         /* bits 0-15 are the VM contexts0-15 */
4812         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4813         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4814                                  WRITE_DATA_DST_SEL(0)));
4815         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4816         radeon_ring_write(ring, 0);
4817         radeon_ring_write(ring, 1 << vm->id);
4818
4819         /* compute doesn't have PFP */
4820         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4821                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4822                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4823                 radeon_ring_write(ring, 0x0);
4824         }
4825 }
4826
4827 /**
4828  * cik_vm_set_page - update the page tables using sDMA
4829  *
4830  * @rdev: radeon_device pointer
4831  * @ib: indirect buffer to fill with commands
4832  * @pe: addr of the page entry
4833  * @addr: dst addr to write into pe
4834  * @count: number of page entries to update
4835  * @incr: increase next addr by incr bytes
4836  * @flags: access flags
4837  *
4838  * Update the page tables using CP or sDMA (CIK).
4839  */
4840 void cik_vm_set_page(struct radeon_device *rdev,
4841                      struct radeon_ib *ib,
4842                      uint64_t pe,
4843                      uint64_t addr, unsigned count,
4844                      uint32_t incr, uint32_t flags)
4845 {
4846         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4847         uint64_t value;
4848         unsigned ndw;
4849
4850         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4851                 /* CP */
4852                 while (count) {
4853                         ndw = 2 + count * 2;
4854                         if (ndw > 0x3FFE)
4855                                 ndw = 0x3FFE;
4856
4857                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4858                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4859                                                     WRITE_DATA_DST_SEL(1));
4860                         ib->ptr[ib->length_dw++] = pe;
4861                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4862                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4863                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4864                                         value = radeon_vm_map_gart(rdev, addr);
4865                                         value &= 0xFFFFFFFFFFFFF000ULL;
4866                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4867                                         value = addr;
4868                                 } else {
4869                                         value = 0;
4870                                 }
4871                                 addr += incr;
4872                                 value |= r600_flags;
4873                                 ib->ptr[ib->length_dw++] = value;
4874                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4875                         }
4876                 }
4877         } else {
4878                 /* DMA */
4879                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4880         }
4881 }
4882
4883 /*
4884  * RLC
4885  * The RLC is a multi-purpose microengine that handles a
4886  * variety of functions, the most important of which is
4887  * the interrupt controller.
4888  */
4889 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4890                                           bool enable)
4891 {
4892         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4893
4894         if (enable)
4895                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4896         else
4897                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4898         WREG32(CP_INT_CNTL_RING0, tmp);
4899 }
4900
4901 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4902 {
4903         u32 tmp;
4904
4905         tmp = RREG32(RLC_LB_CNTL);
4906         if (enable)
4907                 tmp |= LOAD_BALANCE_ENABLE;
4908         else
4909                 tmp &= ~LOAD_BALANCE_ENABLE;
4910         WREG32(RLC_LB_CNTL, tmp);
4911 }
4912
4913 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4914 {
4915         u32 i, j, k;
4916         u32 mask;
4917
4918         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4919                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4920                         cik_select_se_sh(rdev, i, j);
4921                         for (k = 0; k < rdev->usec_timeout; k++) {
4922                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4923                                         break;
4924                                 udelay(1);
4925                         }
4926                 }
4927         }
4928         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4929
4930         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4931         for (k = 0; k < rdev->usec_timeout; k++) {
4932                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4933                         break;
4934                 udelay(1);
4935         }
4936 }
4937
4938 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4939 {
4940         u32 tmp;
4941
4942         tmp = RREG32(RLC_CNTL);
4943         if (tmp != rlc)
4944                 WREG32(RLC_CNTL, rlc);
4945 }
4946
4947 static u32 cik_halt_rlc(struct radeon_device *rdev)
4948 {
4949         u32 data, orig;
4950
4951         orig = data = RREG32(RLC_CNTL);
4952
4953         if (data & RLC_ENABLE) {
4954                 u32 i;
4955
4956                 data &= ~RLC_ENABLE;
4957                 WREG32(RLC_CNTL, data);
4958
4959                 for (i = 0; i < rdev->usec_timeout; i++) {
4960                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4961                                 break;
4962                         udelay(1);
4963                 }
4964
4965                 cik_wait_for_rlc_serdes(rdev);
4966         }
4967
4968         return orig;
4969 }
4970
4971 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4972 {
4973         u32 tmp, i, mask;
4974
4975         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4976         WREG32(RLC_GPR_REG2, tmp);
4977
4978         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4979         for (i = 0; i < rdev->usec_timeout; i++) {
4980                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4981                         break;
4982                 udelay(1);
4983         }
4984
4985         for (i = 0; i < rdev->usec_timeout; i++) {
4986                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4987                         break;
4988                 udelay(1);
4989         }
4990 }
4991
4992 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4993 {
4994         u32 tmp;
4995
4996         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4997         WREG32(RLC_GPR_REG2, tmp);
4998 }
4999
5000 /**
5001  * cik_rlc_stop - stop the RLC ME
5002  *
5003  * @rdev: radeon_device pointer
5004  *
5005  * Halt the RLC ME (MicroEngine) (CIK).
5006  */
5007 static void cik_rlc_stop(struct radeon_device *rdev)
5008 {
5009         WREG32(RLC_CNTL, 0);
5010
5011         cik_enable_gui_idle_interrupt(rdev, false);
5012
5013         cik_wait_for_rlc_serdes(rdev);
5014 }
5015
5016 /**
5017  * cik_rlc_start - start the RLC ME
5018  *
5019  * @rdev: radeon_device pointer
5020  *
5021  * Unhalt the RLC ME (MicroEngine) (CIK).
5022  */
5023 static void cik_rlc_start(struct radeon_device *rdev)
5024 {
5025         WREG32(RLC_CNTL, RLC_ENABLE);
5026
5027         cik_enable_gui_idle_interrupt(rdev, true);
5028
5029         udelay(50);
5030 }
5031
5032 /**
5033  * cik_rlc_resume - setup the RLC hw
5034  *
5035  * @rdev: radeon_device pointer
5036  *
5037  * Initialize the RLC registers, load the ucode,
5038  * and start the RLC (CIK).
5039  * Returns 0 for success, -EINVAL if the ucode is not available.
5040  */
5041 static int cik_rlc_resume(struct radeon_device *rdev)
5042 {
5043         u32 i, size, tmp;
5044         const __be32 *fw_data;
5045
5046         if (!rdev->rlc_fw)
5047                 return -EINVAL;
5048
5049         switch (rdev->family) {
5050         case CHIP_BONAIRE:
5051         default:
5052                 size = BONAIRE_RLC_UCODE_SIZE;
5053                 break;
5054         case CHIP_KAVERI:
5055                 size = KV_RLC_UCODE_SIZE;
5056                 break;
5057         case CHIP_KABINI:
5058                 size = KB_RLC_UCODE_SIZE;
5059                 break;
5060         }
5061
5062         cik_rlc_stop(rdev);
5063
5064         /* disable CG */
5065         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5066         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5067
5068         si_rlc_reset(rdev);
5069
5070         cik_init_pg(rdev);
5071
5072         cik_init_cg(rdev);
5073
5074         WREG32(RLC_LB_CNTR_INIT, 0);
5075         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5076
5077         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5078         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5079         WREG32(RLC_LB_PARAMS, 0x00600408);
5080         WREG32(RLC_LB_CNTL, 0x80000004);
5081
5082         WREG32(RLC_MC_CNTL, 0);
5083         WREG32(RLC_UCODE_CNTL, 0);
5084
5085         fw_data = (const __be32 *)rdev->rlc_fw->data;
5086                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5087         for (i = 0; i < size; i++)
5088                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5089         WREG32(RLC_GPM_UCODE_ADDR, 0);
5090
5091         /* XXX - find out what chips support lbpw */
5092         cik_enable_lbpw(rdev, false);
5093
5094         if (rdev->family == CHIP_BONAIRE)
5095                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5096
5097         cik_rlc_start(rdev);
5098
5099         return 0;
5100 }
5101
5102 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5103 {
5104         u32 data, orig, tmp, tmp2;
5105
5106         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5107
5108         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5109                 cik_enable_gui_idle_interrupt(rdev, true);
5110
5111                 tmp = cik_halt_rlc(rdev);
5112
5113                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5114                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5115                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5116                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5117                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5118
5119                 cik_update_rlc(rdev, tmp);
5120
5121                 data |= CGCG_EN | CGLS_EN;
5122         } else {
5123                 cik_enable_gui_idle_interrupt(rdev, false);
5124
5125                 RREG32(CB_CGTT_SCLK_CTRL);
5126                 RREG32(CB_CGTT_SCLK_CTRL);
5127                 RREG32(CB_CGTT_SCLK_CTRL);
5128                 RREG32(CB_CGTT_SCLK_CTRL);
5129
5130                 data &= ~(CGCG_EN | CGLS_EN);
5131         }
5132
5133         if (orig != data)
5134                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5135
5136 }
5137
5138 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5139 {
5140         u32 data, orig, tmp = 0;
5141
5142         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5143                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5144                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5145                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5146                                 data |= CP_MEM_LS_EN;
5147                                 if (orig != data)
5148                                         WREG32(CP_MEM_SLP_CNTL, data);
5149                         }
5150                 }
5151
5152                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5153                 data &= 0xfffffffd;
5154                 if (orig != data)
5155                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5156
5157                 tmp = cik_halt_rlc(rdev);
5158
5159                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5160                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5161                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5162                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5163                 WREG32(RLC_SERDES_WR_CTRL, data);
5164
5165                 cik_update_rlc(rdev, tmp);
5166
5167                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5168                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5169                         data &= ~SM_MODE_MASK;
5170                         data |= SM_MODE(0x2);
5171                         data |= SM_MODE_ENABLE;
5172                         data &= ~CGTS_OVERRIDE;
5173                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5174                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5175                                 data &= ~CGTS_LS_OVERRIDE;
5176                         data &= ~ON_MONITOR_ADD_MASK;
5177                         data |= ON_MONITOR_ADD_EN;
5178                         data |= ON_MONITOR_ADD(0x96);
5179                         if (orig != data)
5180                                 WREG32(CGTS_SM_CTRL_REG, data);
5181                 }
5182         } else {
5183                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5184                 data |= 0x00000002;
5185                 if (orig != data)
5186                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5187
5188                 data = RREG32(RLC_MEM_SLP_CNTL);
5189                 if (data & RLC_MEM_LS_EN) {
5190                         data &= ~RLC_MEM_LS_EN;
5191                         WREG32(RLC_MEM_SLP_CNTL, data);
5192                 }
5193
5194                 data = RREG32(CP_MEM_SLP_CNTL);
5195                 if (data & CP_MEM_LS_EN) {
5196                         data &= ~CP_MEM_LS_EN;
5197                         WREG32(CP_MEM_SLP_CNTL, data);
5198                 }
5199
5200                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5201                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5202                 if (orig != data)
5203                         WREG32(CGTS_SM_CTRL_REG, data);
5204
5205                 tmp = cik_halt_rlc(rdev);
5206
5207                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5208                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5209                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5210                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5211                 WREG32(RLC_SERDES_WR_CTRL, data);
5212
5213                 cik_update_rlc(rdev, tmp);
5214         }
5215 }
5216
5217 static const u32 mc_cg_registers[] =
5218 {
5219         MC_HUB_MISC_HUB_CG,
5220         MC_HUB_MISC_SIP_CG,
5221         MC_HUB_MISC_VM_CG,
5222         MC_XPB_CLK_GAT,
5223         ATC_MISC_CG,
5224         MC_CITF_MISC_WR_CG,
5225         MC_CITF_MISC_RD_CG,
5226         MC_CITF_MISC_VM_CG,
5227         VM_L2_CG,
5228 };
5229
5230 static void cik_enable_mc_ls(struct radeon_device *rdev,
5231                              bool enable)
5232 {
5233         int i;
5234         u32 orig, data;
5235
5236         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5237                 orig = data = RREG32(mc_cg_registers[i]);
5238                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5239                         data |= MC_LS_ENABLE;
5240                 else
5241                         data &= ~MC_LS_ENABLE;
5242                 if (data != orig)
5243                         WREG32(mc_cg_registers[i], data);
5244         }
5245 }
5246
5247 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5248                                bool enable)
5249 {
5250         int i;
5251         u32 orig, data;
5252
5253         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5254                 orig = data = RREG32(mc_cg_registers[i]);
5255                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5256                         data |= MC_CG_ENABLE;
5257                 else
5258                         data &= ~MC_CG_ENABLE;
5259                 if (data != orig)
5260                         WREG32(mc_cg_registers[i], data);
5261         }
5262 }
5263
5264 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5265                                  bool enable)
5266 {
5267         u32 orig, data;
5268
5269         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5270                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5271                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5272         } else {
5273                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5274                 data |= 0xff000000;
5275                 if (data != orig)
5276                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5277
5278                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5279                 data |= 0xff000000;
5280                 if (data != orig)
5281                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5282         }
5283 }
5284
5285 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5286                                  bool enable)
5287 {
5288         u32 orig, data;
5289
5290         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5291                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5292                 data |= 0x100;
5293                 if (orig != data)
5294                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5295
5296                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5297                 data |= 0x100;
5298                 if (orig != data)
5299                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5300         } else {
5301                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5302                 data &= ~0x100;
5303                 if (orig != data)
5304                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5305
5306                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5307                 data &= ~0x100;
5308                 if (orig != data)
5309                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5310         }
5311 }
5312
5313 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5314                                 bool enable)
5315 {
5316         u32 orig, data;
5317
5318         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5319                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5320                 data = 0xfff;
5321                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5322
5323                 orig = data = RREG32(UVD_CGC_CTRL);
5324                 data |= DCM;
5325                 if (orig != data)
5326                         WREG32(UVD_CGC_CTRL, data);
5327         } else {
5328                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5329                 data &= ~0xfff;
5330                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5331
5332                 orig = data = RREG32(UVD_CGC_CTRL);
5333                 data &= ~DCM;
5334                 if (orig != data)
5335                         WREG32(UVD_CGC_CTRL, data);
5336         }
5337 }
5338
5339 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5340                                bool enable)
5341 {
5342         u32 orig, data;
5343
5344         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5345
5346         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5347                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5348                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5349         else
5350                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5351                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5352
5353         if (orig != data)
5354                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5355 }
5356
5357 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5358                                 bool enable)
5359 {
5360         u32 orig, data;
5361
5362         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5363
5364         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5365                 data &= ~CLOCK_GATING_DIS;
5366         else
5367                 data |= CLOCK_GATING_DIS;
5368
5369         if (orig != data)
5370                 WREG32(HDP_HOST_PATH_CNTL, data);
5371 }
5372
5373 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5374                               bool enable)
5375 {
5376         u32 orig, data;
5377
5378         orig = data = RREG32(HDP_MEM_POWER_LS);
5379
5380         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5381                 data |= HDP_LS_ENABLE;
5382         else
5383                 data &= ~HDP_LS_ENABLE;
5384
5385         if (orig != data)
5386                 WREG32(HDP_MEM_POWER_LS, data);
5387 }
5388
5389 void cik_update_cg(struct radeon_device *rdev,
5390                    u32 block, bool enable)
5391 {
5392
5393         if (block & RADEON_CG_BLOCK_GFX) {
5394                 cik_enable_gui_idle_interrupt(rdev, false);
5395                 /* order matters! */
5396                 if (enable) {
5397                         cik_enable_mgcg(rdev, true);
5398                         cik_enable_cgcg(rdev, true);
5399                 } else {
5400                         cik_enable_cgcg(rdev, false);
5401                         cik_enable_mgcg(rdev, false);
5402                 }
5403                 cik_enable_gui_idle_interrupt(rdev, true);
5404         }
5405
5406         if (block & RADEON_CG_BLOCK_MC) {
5407                 if (!(rdev->flags & RADEON_IS_IGP)) {
5408                         cik_enable_mc_mgcg(rdev, enable);
5409                         cik_enable_mc_ls(rdev, enable);
5410                 }
5411         }
5412
5413         if (block & RADEON_CG_BLOCK_SDMA) {
5414                 cik_enable_sdma_mgcg(rdev, enable);
5415                 cik_enable_sdma_mgls(rdev, enable);
5416         }
5417
5418         if (block & RADEON_CG_BLOCK_BIF) {
5419                 cik_enable_bif_mgls(rdev, enable);
5420         }
5421
5422         if (block & RADEON_CG_BLOCK_UVD) {
5423                 if (rdev->has_uvd)
5424                         cik_enable_uvd_mgcg(rdev, enable);
5425         }
5426
5427         if (block & RADEON_CG_BLOCK_HDP) {
5428                 cik_enable_hdp_mgcg(rdev, enable);
5429                 cik_enable_hdp_ls(rdev, enable);
5430         }
5431 }
5432
5433 static void cik_init_cg(struct radeon_device *rdev)
5434 {
5435
5436         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5437
5438         if (rdev->has_uvd)
5439                 si_init_uvd_internal_cg(rdev);
5440
5441         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5442                              RADEON_CG_BLOCK_SDMA |
5443                              RADEON_CG_BLOCK_BIF |
5444                              RADEON_CG_BLOCK_UVD |
5445                              RADEON_CG_BLOCK_HDP), true);
5446 }
5447
5448 static void cik_fini_cg(struct radeon_device *rdev)
5449 {
5450         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5451                              RADEON_CG_BLOCK_SDMA |
5452                              RADEON_CG_BLOCK_BIF |
5453                              RADEON_CG_BLOCK_UVD |
5454                              RADEON_CG_BLOCK_HDP), false);
5455
5456         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5457 }
5458
5459 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5460                                           bool enable)
5461 {
5462         u32 data, orig;
5463
5464         orig = data = RREG32(RLC_PG_CNTL);
5465         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5466                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5467         else
5468                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5469         if (orig != data)
5470                 WREG32(RLC_PG_CNTL, data);
5471 }
5472
5473 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5474                                           bool enable)
5475 {
5476         u32 data, orig;
5477
5478         orig = data = RREG32(RLC_PG_CNTL);
5479         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5480                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5481         else
5482                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5483         if (orig != data)
5484                 WREG32(RLC_PG_CNTL, data);
5485 }
5486
5487 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5488 {
5489         u32 data, orig;
5490
5491         orig = data = RREG32(RLC_PG_CNTL);
5492         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5493                 data &= ~DISABLE_CP_PG;
5494         else
5495                 data |= DISABLE_CP_PG;
5496         if (orig != data)
5497                 WREG32(RLC_PG_CNTL, data);
5498 }
5499
5500 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5501 {
5502         u32 data, orig;
5503
5504         orig = data = RREG32(RLC_PG_CNTL);
5505         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5506                 data &= ~DISABLE_GDS_PG;
5507         else
5508                 data |= DISABLE_GDS_PG;
5509         if (orig != data)
5510                 WREG32(RLC_PG_CNTL, data);
5511 }
5512
5513 #define CP_ME_TABLE_SIZE    96
5514 #define CP_ME_TABLE_OFFSET  2048
5515 #define CP_MEC_TABLE_OFFSET 4096
5516
5517 void cik_init_cp_pg_table(struct radeon_device *rdev)
5518 {
5519         const __be32 *fw_data;
5520         volatile u32 *dst_ptr;
5521         int me, i, max_me = 4;
5522         u32 bo_offset = 0;
5523         u32 table_offset;
5524
5525         if (rdev->family == CHIP_KAVERI)
5526                 max_me = 5;
5527
5528         if (rdev->rlc.cp_table_ptr == NULL)
5529                 return;
5530
5531         /* write the cp table buffer */
5532         dst_ptr = rdev->rlc.cp_table_ptr;
5533         for (me = 0; me < max_me; me++) {
5534                 if (me == 0) {
5535                         fw_data = (const __be32 *)rdev->ce_fw->data;
5536                         table_offset = CP_ME_TABLE_OFFSET;
5537                 } else if (me == 1) {
5538                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5539                         table_offset = CP_ME_TABLE_OFFSET;
5540                 } else if (me == 2) {
5541                         fw_data = (const __be32 *)rdev->me_fw->data;
5542                         table_offset = CP_ME_TABLE_OFFSET;
5543                 } else {
5544                         fw_data = (const __be32 *)rdev->mec_fw->data;
5545                         table_offset = CP_MEC_TABLE_OFFSET;
5546                 }
5547
5548                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5549                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5550                 }
5551                 bo_offset += CP_ME_TABLE_SIZE;
5552         }
5553 }
5554
5555 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5556                                 bool enable)
5557 {
5558         u32 data, orig;
5559
5560         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5561                 orig = data = RREG32(RLC_PG_CNTL);
5562                 data |= GFX_PG_ENABLE;
5563                 if (orig != data)
5564                         WREG32(RLC_PG_CNTL, data);
5565
5566                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5567                 data |= AUTO_PG_EN;
5568                 if (orig != data)
5569                         WREG32(RLC_AUTO_PG_CTRL, data);
5570         } else {
5571                 orig = data = RREG32(RLC_PG_CNTL);
5572                 data &= ~GFX_PG_ENABLE;
5573                 if (orig != data)
5574                         WREG32(RLC_PG_CNTL, data);
5575
5576                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577                 data &= ~AUTO_PG_EN;
5578                 if (orig != data)
5579                         WREG32(RLC_AUTO_PG_CTRL, data);
5580
5581                 data = RREG32(DB_RENDER_CONTROL);
5582         }
5583 }
5584
5585 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5586 {
5587         u32 mask = 0, tmp, tmp1;
5588         int i;
5589
5590         cik_select_se_sh(rdev, se, sh);
5591         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5592         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5593         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5594
5595         tmp &= 0xffff0000;
5596
5597         tmp |= tmp1;
5598         tmp >>= 16;
5599
5600         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5601                 mask <<= 1;
5602                 mask |= 1;
5603         }
5604
5605         return (~tmp) & mask;
5606 }
5607
5608 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5609 {
5610         u32 i, j, k, active_cu_number = 0;
5611         u32 mask, counter, cu_bitmap;
5612         u32 tmp = 0;
5613
5614         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5615                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5616                         mask = 1;
5617                         cu_bitmap = 0;
5618                         counter = 0;
5619                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5620                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5621                                         if (counter < 2)
5622                                                 cu_bitmap |= mask;
5623                                         counter ++;
5624                                 }
5625                                 mask <<= 1;
5626                         }
5627
5628                         active_cu_number += counter;
5629                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5630                 }
5631         }
5632
5633         WREG32(RLC_PG_AO_CU_MASK, tmp);
5634
5635         tmp = RREG32(RLC_MAX_PG_CU);
5636         tmp &= ~MAX_PU_CU_MASK;
5637         tmp |= MAX_PU_CU(active_cu_number);
5638         WREG32(RLC_MAX_PG_CU, tmp);
5639 }
5640
5641 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5642                                        bool enable)
5643 {
5644         u32 data, orig;
5645
5646         orig = data = RREG32(RLC_PG_CNTL);
5647         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5648                 data |= STATIC_PER_CU_PG_ENABLE;
5649         else
5650                 data &= ~STATIC_PER_CU_PG_ENABLE;
5651         if (orig != data)
5652                 WREG32(RLC_PG_CNTL, data);
5653 }
5654
5655 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5656                                         bool enable)
5657 {
5658         u32 data, orig;
5659
5660         orig = data = RREG32(RLC_PG_CNTL);
5661         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5662                 data |= DYN_PER_CU_PG_ENABLE;
5663         else
5664                 data &= ~DYN_PER_CU_PG_ENABLE;
5665         if (orig != data)
5666                 WREG32(RLC_PG_CNTL, data);
5667 }
5668
5669 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5670 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5671
5672 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5673 {
5674         u32 data, orig;
5675         u32 i;
5676
5677         if (rdev->rlc.cs_data) {
5678                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5679                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5680                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5681                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5682         } else {
5683                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5684                 for (i = 0; i < 3; i++)
5685                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5686         }
5687         if (rdev->rlc.reg_list) {
5688                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5689                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5690                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5691         }
5692
5693         orig = data = RREG32(RLC_PG_CNTL);
5694         data |= GFX_PG_SRC;
5695         if (orig != data)
5696                 WREG32(RLC_PG_CNTL, data);
5697
5698         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5699         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5700
5701         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5702         data &= ~IDLE_POLL_COUNT_MASK;
5703         data |= IDLE_POLL_COUNT(0x60);
5704         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5705
5706         data = 0x10101010;
5707         WREG32(RLC_PG_DELAY, data);
5708
5709         data = RREG32(RLC_PG_DELAY_2);
5710         data &= ~0xff;
5711         data |= 0x3;
5712         WREG32(RLC_PG_DELAY_2, data);
5713
5714         data = RREG32(RLC_AUTO_PG_CTRL);
5715         data &= ~GRBM_REG_SGIT_MASK;
5716         data |= GRBM_REG_SGIT(0x700);
5717         WREG32(RLC_AUTO_PG_CTRL, data);
5718
5719 }
5720
5721 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5722 {
5723         cik_enable_gfx_cgpg(rdev, enable);
5724         cik_enable_gfx_static_mgpg(rdev, enable);
5725         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5726 }
5727
5728 u32 cik_get_csb_size(struct radeon_device *rdev)
5729 {
5730         u32 count = 0;
5731         const struct cs_section_def *sect = NULL;
5732         const struct cs_extent_def *ext = NULL;
5733
5734         if (rdev->rlc.cs_data == NULL)
5735                 return 0;
5736
5737         /* begin clear state */
5738         count += 2;
5739         /* context control state */
5740         count += 3;
5741
5742         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5743                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5744                         if (sect->id == SECT_CONTEXT)
5745                                 count += 2 + ext->reg_count;
5746                         else
5747                                 return 0;
5748                 }
5749         }
5750         /* pa_sc_raster_config/pa_sc_raster_config1 */
5751         count += 4;
5752         /* end clear state */
5753         count += 2;
5754         /* clear state */
5755         count += 2;
5756
5757         return count;
5758 }
5759
5760 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5761 {
5762         u32 count = 0, i;
5763         const struct cs_section_def *sect = NULL;
5764         const struct cs_extent_def *ext = NULL;
5765
5766         if (rdev->rlc.cs_data == NULL)
5767                 return;
5768         if (buffer == NULL)
5769                 return;
5770
5771         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5772         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5773
5774         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5775         buffer[count++] = 0x80000000;
5776         buffer[count++] = 0x80000000;
5777
5778         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5779                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5780                         if (sect->id == SECT_CONTEXT) {
5781                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5782                                 buffer[count++] = ext->reg_index - 0xa000;
5783                                 for (i = 0; i < ext->reg_count; i++)
5784                                         buffer[count++] = ext->extent[i];
5785                         } else {
5786                                 return;
5787                         }
5788                 }
5789         }
5790
5791         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5792         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5793         switch (rdev->family) {
5794         case CHIP_BONAIRE:
5795                 buffer[count++] = 0x16000012;
5796                 buffer[count++] = 0x00000000;
5797                 break;
5798         case CHIP_KAVERI:
5799                 buffer[count++] = 0x00000000; /* XXX */
5800                 buffer[count++] = 0x00000000;
5801                 break;
5802         case CHIP_KABINI:
5803                 buffer[count++] = 0x00000000; /* XXX */
5804                 buffer[count++] = 0x00000000;
5805                 break;
5806         default:
5807                 buffer[count++] = 0x00000000;
5808                 buffer[count++] = 0x00000000;
5809                 break;
5810         }
5811
5812         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5813         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5814
5815         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5816         buffer[count++] = 0;
5817 }
5818
5819 static void cik_init_pg(struct radeon_device *rdev)
5820 {
5821         if (rdev->pg_flags) {
5822                 cik_enable_sck_slowdown_on_pu(rdev, true);
5823                 cik_enable_sck_slowdown_on_pd(rdev, true);
5824                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5825                         cik_init_gfx_cgpg(rdev);
5826                         cik_enable_cp_pg(rdev, true);
5827                         cik_enable_gds_pg(rdev, true);
5828                 }
5829                 cik_init_ao_cu_mask(rdev);
5830                 cik_update_gfx_pg(rdev, true);
5831         }
5832 }
5833
5834 static void cik_fini_pg(struct radeon_device *rdev)
5835 {
5836         if (rdev->pg_flags) {
5837                 cik_update_gfx_pg(rdev, false);
5838                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5839                         cik_enable_cp_pg(rdev, false);
5840                         cik_enable_gds_pg(rdev, false);
5841                 }
5842         }
5843 }
5844
5845 /*
5846  * Interrupts
5847  * Starting with r6xx, interrupts are handled via a ring buffer.
5848  * Ring buffers are areas of GPU accessible memory that the GPU
5849  * writes interrupt vectors into and the host reads vectors out of.
5850  * There is a rptr (read pointer) that determines where the
5851  * host is currently reading, and a wptr (write pointer)
5852  * which determines where the GPU has written.  When the
5853  * pointers are equal, the ring is idle.  When the GPU
5854  * writes vectors to the ring buffer, it increments the
5855  * wptr.  When there is an interrupt, the host then starts
5856  * fetching commands and processing them until the pointers are
5857  * equal again at which point it updates the rptr.
5858  */
5859
5860 /**
5861  * cik_enable_interrupts - Enable the interrupt ring buffer
5862  *
5863  * @rdev: radeon_device pointer
5864  *
5865  * Enable the interrupt ring buffer (CIK).
5866  */
5867 static void cik_enable_interrupts(struct radeon_device *rdev)
5868 {
5869         u32 ih_cntl = RREG32(IH_CNTL);
5870         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5871
5872         ih_cntl |= ENABLE_INTR;
5873         ih_rb_cntl |= IH_RB_ENABLE;
5874         WREG32(IH_CNTL, ih_cntl);
5875         WREG32(IH_RB_CNTL, ih_rb_cntl);
5876         rdev->ih.enabled = true;
5877 }
5878
5879 /**
5880  * cik_disable_interrupts - Disable the interrupt ring buffer
5881  *
5882  * @rdev: radeon_device pointer
5883  *
5884  * Disable the interrupt ring buffer (CIK).
5885  */
5886 static void cik_disable_interrupts(struct radeon_device *rdev)
5887 {
5888         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889         u32 ih_cntl = RREG32(IH_CNTL);
5890
5891         ih_rb_cntl &= ~IH_RB_ENABLE;
5892         ih_cntl &= ~ENABLE_INTR;
5893         WREG32(IH_RB_CNTL, ih_rb_cntl);
5894         WREG32(IH_CNTL, ih_cntl);
5895         /* set rptr, wptr to 0 */
5896         WREG32(IH_RB_RPTR, 0);
5897         WREG32(IH_RB_WPTR, 0);
5898         rdev->ih.enabled = false;
5899         rdev->ih.rptr = 0;
5900 }
5901
5902 /**
5903  * cik_disable_interrupt_state - Disable all interrupt sources
5904  *
5905  * @rdev: radeon_device pointer
5906  *
5907  * Clear all interrupt enable bits used by the driver (CIK).
5908  */
5909 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5910 {
5911         u32 tmp;
5912
5913         /* gfx ring */
5914         tmp = RREG32(CP_INT_CNTL_RING0) &
5915                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5916         WREG32(CP_INT_CNTL_RING0, tmp);
5917         /* sdma */
5918         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5919         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5920         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5921         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5922         /* compute queues */
5923         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5924         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5925         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5926         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5927         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5928         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5929         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5930         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5931         /* grbm */
5932         WREG32(GRBM_INT_CNTL, 0);
5933         /* vline/vblank, etc. */
5934         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5935         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5936         if (rdev->num_crtc >= 4) {
5937                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5938                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5939         }
5940         if (rdev->num_crtc >= 6) {
5941                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5942                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5943         }
5944
5945         /* dac hotplug */
5946         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5947
5948         /* digital hotplug */
5949         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950         WREG32(DC_HPD1_INT_CONTROL, tmp);
5951         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952         WREG32(DC_HPD2_INT_CONTROL, tmp);
5953         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5954         WREG32(DC_HPD3_INT_CONTROL, tmp);
5955         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956         WREG32(DC_HPD4_INT_CONTROL, tmp);
5957         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958         WREG32(DC_HPD5_INT_CONTROL, tmp);
5959         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960         WREG32(DC_HPD6_INT_CONTROL, tmp);
5961
5962 }
5963
5964 /**
5965  * cik_irq_init - init and enable the interrupt ring
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * Allocate a ring buffer for the interrupt controller,
5970  * enable the RLC, disable interrupts, enable the IH
5971  * ring buffer and enable it (CIK).
5972  * Called at device load and reume.
5973  * Returns 0 for success, errors for failure.
5974  */
5975 static int cik_irq_init(struct radeon_device *rdev)
5976 {
5977         int ret = 0;
5978         int rb_bufsz;
5979         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5980
5981         /* allocate ring */
5982         ret = r600_ih_ring_alloc(rdev);
5983         if (ret)
5984                 return ret;
5985
5986         /* disable irqs */
5987         cik_disable_interrupts(rdev);
5988
5989         /* init rlc */
5990         ret = cik_rlc_resume(rdev);
5991         if (ret) {
5992                 r600_ih_ring_fini(rdev);
5993                 return ret;
5994         }
5995
5996         /* setup interrupt control */
5997         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5998         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5999         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6000         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6001          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6002          */
6003         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6004         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6005         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6006         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6007
6008         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6009         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6010
6011         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6012                       IH_WPTR_OVERFLOW_CLEAR |
6013                       (rb_bufsz << 1));
6014
6015         if (rdev->wb.enabled)
6016                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6017
6018         /* set the writeback address whether it's enabled or not */
6019         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6020         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6021
6022         WREG32(IH_RB_CNTL, ih_rb_cntl);
6023
6024         /* set rptr, wptr to 0 */
6025         WREG32(IH_RB_RPTR, 0);
6026         WREG32(IH_RB_WPTR, 0);
6027
6028         /* Default settings for IH_CNTL (disabled at first) */
6029         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6030         /* RPTR_REARM only works if msi's are enabled */
6031         if (rdev->msi_enabled)
6032                 ih_cntl |= RPTR_REARM;
6033         WREG32(IH_CNTL, ih_cntl);
6034
6035         /* force the active interrupt state to all disabled */
6036         cik_disable_interrupt_state(rdev);
6037
6038         pci_set_master(rdev->pdev);
6039
6040         /* enable irqs */
6041         cik_enable_interrupts(rdev);
6042
6043         return ret;
6044 }
6045
6046 /**
6047  * cik_irq_set - enable/disable interrupt sources
6048  *
6049  * @rdev: radeon_device pointer
6050  *
6051  * Enable interrupt sources on the GPU (vblanks, hpd,
6052  * etc.) (CIK).
6053  * Returns 0 for success, errors for failure.
6054  */
6055 int cik_irq_set(struct radeon_device *rdev)
6056 {
6057         u32 cp_int_cntl;
6058         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6059         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6060         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6061         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6062         u32 grbm_int_cntl = 0;
6063         u32 dma_cntl, dma_cntl1;
6064         u32 thermal_int;
6065
6066         if (!rdev->irq.installed) {
6067                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6068                 return -EINVAL;
6069         }
6070         /* don't enable anything if the ih is disabled */
6071         if (!rdev->ih.enabled) {
6072                 cik_disable_interrupts(rdev);
6073                 /* force the active interrupt state to all disabled */
6074                 cik_disable_interrupt_state(rdev);
6075                 return 0;
6076         }
6077
6078         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6079                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6080         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6081
6082         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6083         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6084         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6085         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6086         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6087         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6088
6089         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6090         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6091
6092         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6093         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6096         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6097         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6098         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100
6101         if (rdev->flags & RADEON_IS_IGP)
6102                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6103                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6104         else
6105                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6106                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6107
6108         /* enable CP interrupts on all rings */
6109         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6110                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6111                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6112         }
6113         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6114                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6115                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6116                 if (ring->me == 1) {
6117                         switch (ring->pipe) {
6118                         case 0:
6119                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6120                                 break;
6121                         case 1:
6122                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6123                                 break;
6124                         case 2:
6125                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6126                                 break;
6127                         case 3:
6128                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6129                                 break;
6130                         default:
6131                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6132                                 break;
6133                         }
6134                 } else if (ring->me == 2) {
6135                         switch (ring->pipe) {
6136                         case 0:
6137                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6138                                 break;
6139                         case 1:
6140                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6141                                 break;
6142                         case 2:
6143                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6144                                 break;
6145                         case 3:
6146                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6147                                 break;
6148                         default:
6149                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6150                                 break;
6151                         }
6152                 } else {
6153                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6154                 }
6155         }
6156         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6157                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6158                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6159                 if (ring->me == 1) {
6160                         switch (ring->pipe) {
6161                         case 0:
6162                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6163                                 break;
6164                         case 1:
6165                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6166                                 break;
6167                         case 2:
6168                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6169                                 break;
6170                         case 3:
6171                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6172                                 break;
6173                         default:
6174                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6175                                 break;
6176                         }
6177                 } else if (ring->me == 2) {
6178                         switch (ring->pipe) {
6179                         case 0:
6180                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6181                                 break;
6182                         case 1:
6183                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6184                                 break;
6185                         case 2:
6186                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6187                                 break;
6188                         case 3:
6189                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6190                                 break;
6191                         default:
6192                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6193                                 break;
6194                         }
6195                 } else {
6196                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6197                 }
6198         }
6199
6200         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6201                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6202                 dma_cntl |= TRAP_ENABLE;
6203         }
6204
6205         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6206                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6207                 dma_cntl1 |= TRAP_ENABLE;
6208         }
6209
6210         if (rdev->irq.crtc_vblank_int[0] ||
6211             atomic_read(&rdev->irq.pflip[0])) {
6212                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6213                 crtc1 |= VBLANK_INTERRUPT_MASK;
6214         }
6215         if (rdev->irq.crtc_vblank_int[1] ||
6216             atomic_read(&rdev->irq.pflip[1])) {
6217                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6218                 crtc2 |= VBLANK_INTERRUPT_MASK;
6219         }
6220         if (rdev->irq.crtc_vblank_int[2] ||
6221             atomic_read(&rdev->irq.pflip[2])) {
6222                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6223                 crtc3 |= VBLANK_INTERRUPT_MASK;
6224         }
6225         if (rdev->irq.crtc_vblank_int[3] ||
6226             atomic_read(&rdev->irq.pflip[3])) {
6227                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6228                 crtc4 |= VBLANK_INTERRUPT_MASK;
6229         }
6230         if (rdev->irq.crtc_vblank_int[4] ||
6231             atomic_read(&rdev->irq.pflip[4])) {
6232                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6233                 crtc5 |= VBLANK_INTERRUPT_MASK;
6234         }
6235         if (rdev->irq.crtc_vblank_int[5] ||
6236             atomic_read(&rdev->irq.pflip[5])) {
6237                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6238                 crtc6 |= VBLANK_INTERRUPT_MASK;
6239         }
6240         if (rdev->irq.hpd[0]) {
6241                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6242                 hpd1 |= DC_HPDx_INT_EN;
6243         }
6244         if (rdev->irq.hpd[1]) {
6245                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6246                 hpd2 |= DC_HPDx_INT_EN;
6247         }
6248         if (rdev->irq.hpd[2]) {
6249                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6250                 hpd3 |= DC_HPDx_INT_EN;
6251         }
6252         if (rdev->irq.hpd[3]) {
6253                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6254                 hpd4 |= DC_HPDx_INT_EN;
6255         }
6256         if (rdev->irq.hpd[4]) {
6257                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6258                 hpd5 |= DC_HPDx_INT_EN;
6259         }
6260         if (rdev->irq.hpd[5]) {
6261                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6262                 hpd6 |= DC_HPDx_INT_EN;
6263         }
6264
6265         if (rdev->irq.dpm_thermal) {
6266                 DRM_DEBUG("dpm thermal\n");
6267                 if (rdev->flags & RADEON_IS_IGP)
6268                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6269                 else
6270                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6271         }
6272
6273         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6274
6275         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6276         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6277
6278         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6279         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6280         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6281         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6282         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6283         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6284         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6285         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6286
6287         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6288
6289         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6290         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6291         if (rdev->num_crtc >= 4) {
6292                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6293                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6294         }
6295         if (rdev->num_crtc >= 6) {
6296                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6297                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6298         }
6299
6300         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6301         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6302         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6303         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6304         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6305         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6306
6307         if (rdev->flags & RADEON_IS_IGP)
6308                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6309         else
6310                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6311
6312         return 0;
6313 }
6314
6315 /**
6316  * cik_irq_ack - ack interrupt sources
6317  *
6318  * @rdev: radeon_device pointer
6319  *
6320  * Ack interrupt sources on the GPU (vblanks, hpd,
6321  * etc.) (CIK).  Certain interrupts sources are sw
6322  * generated and do not require an explicit ack.
6323  */
6324 static inline void cik_irq_ack(struct radeon_device *rdev)
6325 {
6326         u32 tmp;
6327
6328         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6329         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6330         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6331         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6332         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6333         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6334         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6335
6336         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6337                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6338         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6339                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6340         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6341                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6342         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6343                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6344
6345         if (rdev->num_crtc >= 4) {
6346                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6347                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6348                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6349                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6350                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6351                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6352                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6353                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6354         }
6355
6356         if (rdev->num_crtc >= 6) {
6357                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6358                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6359                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6360                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6361                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6362                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6363                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6364                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6365         }
6366
6367         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6368                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6369                 tmp |= DC_HPDx_INT_ACK;
6370                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6371         }
6372         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6373                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6374                 tmp |= DC_HPDx_INT_ACK;
6375                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6376         }
6377         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6378                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6379                 tmp |= DC_HPDx_INT_ACK;
6380                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6381         }
6382         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6383                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6384                 tmp |= DC_HPDx_INT_ACK;
6385                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6386         }
6387         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6388                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6389                 tmp |= DC_HPDx_INT_ACK;
6390                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6391         }
6392         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6393                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6394                 tmp |= DC_HPDx_INT_ACK;
6395                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6396         }
6397 }
6398
6399 /**
6400  * cik_irq_disable - disable interrupts
6401  *
6402  * @rdev: radeon_device pointer
6403  *
6404  * Disable interrupts on the hw (CIK).
6405  */
6406 static void cik_irq_disable(struct radeon_device *rdev)
6407 {
6408         cik_disable_interrupts(rdev);
6409         /* Wait and acknowledge irq */
6410         mdelay(1);
6411         cik_irq_ack(rdev);
6412         cik_disable_interrupt_state(rdev);
6413 }
6414
6415 /**
6416  * cik_irq_disable - disable interrupts for suspend
6417  *
6418  * @rdev: radeon_device pointer
6419  *
6420  * Disable interrupts and stop the RLC (CIK).
6421  * Used for suspend.
6422  */
6423 static void cik_irq_suspend(struct radeon_device *rdev)
6424 {
6425         cik_irq_disable(rdev);
6426         cik_rlc_stop(rdev);
6427 }
6428
6429 /**
6430  * cik_irq_fini - tear down interrupt support
6431  *
6432  * @rdev: radeon_device pointer
6433  *
6434  * Disable interrupts on the hw and free the IH ring
6435  * buffer (CIK).
6436  * Used for driver unload.
6437  */
6438 static void cik_irq_fini(struct radeon_device *rdev)
6439 {
6440         cik_irq_suspend(rdev);
6441         r600_ih_ring_fini(rdev);
6442 }
6443
6444 /**
6445  * cik_get_ih_wptr - get the IH ring buffer wptr
6446  *
6447  * @rdev: radeon_device pointer
6448  *
6449  * Get the IH ring buffer wptr from either the register
6450  * or the writeback memory buffer (CIK).  Also check for
6451  * ring buffer overflow and deal with it.
6452  * Used by cik_irq_process().
6453  * Returns the value of the wptr.
6454  */
6455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6456 {
6457         u32 wptr, tmp;
6458
6459         if (rdev->wb.enabled)
6460                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6461         else
6462                 wptr = RREG32(IH_RB_WPTR);
6463
6464         if (wptr & RB_OVERFLOW) {
6465                 /* When a ring buffer overflow happen start parsing interrupt
6466                  * from the last not overwritten vector (wptr + 16). Hopefully
6467                  * this should allow us to catchup.
6468                  */
6469                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6470                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6471                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6472                 tmp = RREG32(IH_RB_CNTL);
6473                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6474                 WREG32(IH_RB_CNTL, tmp);
6475         }
6476         return (wptr & rdev->ih.ptr_mask);
6477 }
6478
6479 /*        CIK IV Ring
6480  * Each IV ring entry is 128 bits:
6481  * [7:0]    - interrupt source id
6482  * [31:8]   - reserved
6483  * [59:32]  - interrupt source data
6484  * [63:60]  - reserved
6485  * [71:64]  - RINGID
6486  *            CP:
6487  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6488  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6489  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6490  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6491  *            PIPE_ID - ME0 0=3D
6492  *                    - ME1&2 compute dispatcher (4 pipes each)
6493  *            SDMA:
6494  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6495  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6496  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6497  * [79:72]  - VMID
6498  * [95:80]  - PASID
6499  * [127:96] - reserved
6500  */
6501 /**
6502  * cik_irq_process - interrupt handler
6503  *
6504  * @rdev: radeon_device pointer
6505  *
6506  * Interrupt hander (CIK).  Walk the IH ring,
6507  * ack interrupts and schedule work to handle
6508  * interrupt events.
6509  * Returns irq process return code.
6510  */
6511 int cik_irq_process(struct radeon_device *rdev)
6512 {
6513         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6514         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6515         u32 wptr;
6516         u32 rptr;
6517         u32 src_id, src_data, ring_id;
6518         u8 me_id, pipe_id, queue_id;
6519         u32 ring_index;
6520         bool queue_hotplug = false;
6521         bool queue_reset = false;
6522         u32 addr, status, mc_client;
6523         bool queue_thermal = false;
6524
6525         if (!rdev->ih.enabled || rdev->shutdown)
6526                 return IRQ_NONE;
6527
6528         wptr = cik_get_ih_wptr(rdev);
6529
6530 restart_ih:
6531         /* is somebody else already processing irqs? */
6532         if (atomic_xchg(&rdev->ih.lock, 1))
6533                 return IRQ_NONE;
6534
6535         rptr = rdev->ih.rptr;
6536         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6537
6538         /* Order reading of wptr vs. reading of IH ring data */
6539         rmb();
6540
6541         /* display interrupts */
6542         cik_irq_ack(rdev);
6543
6544         while (rptr != wptr) {
6545                 /* wptr/rptr are in bytes! */
6546                 ring_index = rptr / 4;
6547                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6548                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6549                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6550
6551                 switch (src_id) {
6552                 case 1: /* D1 vblank/vline */
6553                         switch (src_data) {
6554                         case 0: /* D1 vblank */
6555                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6556                                         if (rdev->irq.crtc_vblank_int[0]) {
6557                                                 drm_handle_vblank(rdev->ddev, 0);
6558                                                 rdev->pm.vblank_sync = true;
6559                                                 wake_up(&rdev->irq.vblank_queue);
6560                                         }
6561                                         if (atomic_read(&rdev->irq.pflip[0]))
6562                                                 radeon_crtc_handle_flip(rdev, 0);
6563                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6564                                         DRM_DEBUG("IH: D1 vblank\n");
6565                                 }
6566                                 break;
6567                         case 1: /* D1 vline */
6568                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6569                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6570                                         DRM_DEBUG("IH: D1 vline\n");
6571                                 }
6572                                 break;
6573                         default:
6574                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6575                                 break;
6576                         }
6577                         break;
6578                 case 2: /* D2 vblank/vline */
6579                         switch (src_data) {
6580                         case 0: /* D2 vblank */
6581                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6582                                         if (rdev->irq.crtc_vblank_int[1]) {
6583                                                 drm_handle_vblank(rdev->ddev, 1);
6584                                                 rdev->pm.vblank_sync = true;
6585                                                 wake_up(&rdev->irq.vblank_queue);
6586                                         }
6587                                         if (atomic_read(&rdev->irq.pflip[1]))
6588                                                 radeon_crtc_handle_flip(rdev, 1);
6589                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6590                                         DRM_DEBUG("IH: D2 vblank\n");
6591                                 }
6592                                 break;
6593                         case 1: /* D2 vline */
6594                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6595                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6596                                         DRM_DEBUG("IH: D2 vline\n");
6597                                 }
6598                                 break;
6599                         default:
6600                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6601                                 break;
6602                         }
6603                         break;
6604                 case 3: /* D3 vblank/vline */
6605                         switch (src_data) {
6606                         case 0: /* D3 vblank */
6607                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6608                                         if (rdev->irq.crtc_vblank_int[2]) {
6609                                                 drm_handle_vblank(rdev->ddev, 2);
6610                                                 rdev->pm.vblank_sync = true;
6611                                                 wake_up(&rdev->irq.vblank_queue);
6612                                         }
6613                                         if (atomic_read(&rdev->irq.pflip[2]))
6614                                                 radeon_crtc_handle_flip(rdev, 2);
6615                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6616                                         DRM_DEBUG("IH: D3 vblank\n");
6617                                 }
6618                                 break;
6619                         case 1: /* D3 vline */
6620                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6621                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6622                                         DRM_DEBUG("IH: D3 vline\n");
6623                                 }
6624                                 break;
6625                         default:
6626                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6627                                 break;
6628                         }
6629                         break;
6630                 case 4: /* D4 vblank/vline */
6631                         switch (src_data) {
6632                         case 0: /* D4 vblank */
6633                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6634                                         if (rdev->irq.crtc_vblank_int[3]) {
6635                                                 drm_handle_vblank(rdev->ddev, 3);
6636                                                 rdev->pm.vblank_sync = true;
6637                                                 wake_up(&rdev->irq.vblank_queue);
6638                                         }
6639                                         if (atomic_read(&rdev->irq.pflip[3]))
6640                                                 radeon_crtc_handle_flip(rdev, 3);
6641                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6642                                         DRM_DEBUG("IH: D4 vblank\n");
6643                                 }
6644                                 break;
6645                         case 1: /* D4 vline */
6646                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6647                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6648                                         DRM_DEBUG("IH: D4 vline\n");
6649                                 }
6650                                 break;
6651                         default:
6652                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6653                                 break;
6654                         }
6655                         break;
6656                 case 5: /* D5 vblank/vline */
6657                         switch (src_data) {
6658                         case 0: /* D5 vblank */
6659                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6660                                         if (rdev->irq.crtc_vblank_int[4]) {
6661                                                 drm_handle_vblank(rdev->ddev, 4);
6662                                                 rdev->pm.vblank_sync = true;
6663                                                 wake_up(&rdev->irq.vblank_queue);
6664                                         }
6665                                         if (atomic_read(&rdev->irq.pflip[4]))
6666                                                 radeon_crtc_handle_flip(rdev, 4);
6667                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6668                                         DRM_DEBUG("IH: D5 vblank\n");
6669                                 }
6670                                 break;
6671                         case 1: /* D5 vline */
6672                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6673                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6674                                         DRM_DEBUG("IH: D5 vline\n");
6675                                 }
6676                                 break;
6677                         default:
6678                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6679                                 break;
6680                         }
6681                         break;
6682                 case 6: /* D6 vblank/vline */
6683                         switch (src_data) {
6684                         case 0: /* D6 vblank */
6685                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6686                                         if (rdev->irq.crtc_vblank_int[5]) {
6687                                                 drm_handle_vblank(rdev->ddev, 5);
6688                                                 rdev->pm.vblank_sync = true;
6689                                                 wake_up(&rdev->irq.vblank_queue);
6690                                         }
6691                                         if (atomic_read(&rdev->irq.pflip[5]))
6692                                                 radeon_crtc_handle_flip(rdev, 5);
6693                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6694                                         DRM_DEBUG("IH: D6 vblank\n");
6695                                 }
6696                                 break;
6697                         case 1: /* D6 vline */
6698                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6699                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6700                                         DRM_DEBUG("IH: D6 vline\n");
6701                                 }
6702                                 break;
6703                         default:
6704                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6705                                 break;
6706                         }
6707                         break;
6708                 case 42: /* HPD hotplug */
6709                         switch (src_data) {
6710                         case 0:
6711                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6712                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6713                                         queue_hotplug = true;
6714                                         DRM_DEBUG("IH: HPD1\n");
6715                                 }
6716                                 break;
6717                         case 1:
6718                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6719                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6720                                         queue_hotplug = true;
6721                                         DRM_DEBUG("IH: HPD2\n");
6722                                 }
6723                                 break;
6724                         case 2:
6725                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6726                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6727                                         queue_hotplug = true;
6728                                         DRM_DEBUG("IH: HPD3\n");
6729                                 }
6730                                 break;
6731                         case 3:
6732                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6733                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6734                                         queue_hotplug = true;
6735                                         DRM_DEBUG("IH: HPD4\n");
6736                                 }
6737                                 break;
6738                         case 4:
6739                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6740                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6741                                         queue_hotplug = true;
6742                                         DRM_DEBUG("IH: HPD5\n");
6743                                 }
6744                                 break;
6745                         case 5:
6746                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6747                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6748                                         queue_hotplug = true;
6749                                         DRM_DEBUG("IH: HPD6\n");
6750                                 }
6751                                 break;
6752                         default:
6753                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6754                                 break;
6755                         }
6756                         break;
6757                 case 124: /* UVD */
6758                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6759                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6760                         break;
6761                 case 146:
6762                 case 147:
6763                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6764                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6765                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6766                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6767                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6768                                 addr);
6769                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6770                                 status);
6771                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6772                         /* reset addr and status */
6773                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6774                         break;
6775                 case 176: /* GFX RB CP_INT */
6776                 case 177: /* GFX IB CP_INT */
6777                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6778                         break;
6779                 case 181: /* CP EOP event */
6780                         DRM_DEBUG("IH: CP EOP\n");
6781                         /* XXX check the bitfield order! */
6782                         me_id = (ring_id & 0x60) >> 5;
6783                         pipe_id = (ring_id & 0x18) >> 3;
6784                         queue_id = (ring_id & 0x7) >> 0;
6785                         switch (me_id) {
6786                         case 0:
6787                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6788                                 break;
6789                         case 1:
6790                         case 2:
6791                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6792                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6793                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6794                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6795                                 break;
6796                         }
6797                         break;
6798                 case 184: /* CP Privileged reg access */
6799                         DRM_ERROR("Illegal register access in command stream\n");
6800                         /* XXX check the bitfield order! */
6801                         me_id = (ring_id & 0x60) >> 5;
6802                         pipe_id = (ring_id & 0x18) >> 3;
6803                         queue_id = (ring_id & 0x7) >> 0;
6804                         switch (me_id) {
6805                         case 0:
6806                                 /* This results in a full GPU reset, but all we need to do is soft
6807                                  * reset the CP for gfx
6808                                  */
6809                                 queue_reset = true;
6810                                 break;
6811                         case 1:
6812                                 /* XXX compute */
6813                                 queue_reset = true;
6814                                 break;
6815                         case 2:
6816                                 /* XXX compute */
6817                                 queue_reset = true;
6818                                 break;
6819                         }
6820                         break;
6821                 case 185: /* CP Privileged inst */
6822                         DRM_ERROR("Illegal instruction in command stream\n");
6823                         /* XXX check the bitfield order! */
6824                         me_id = (ring_id & 0x60) >> 5;
6825                         pipe_id = (ring_id & 0x18) >> 3;
6826                         queue_id = (ring_id & 0x7) >> 0;
6827                         switch (me_id) {
6828                         case 0:
6829                                 /* This results in a full GPU reset, but all we need to do is soft
6830                                  * reset the CP for gfx
6831                                  */
6832                                 queue_reset = true;
6833                                 break;
6834                         case 1:
6835                                 /* XXX compute */
6836                                 queue_reset = true;
6837                                 break;
6838                         case 2:
6839                                 /* XXX compute */
6840                                 queue_reset = true;
6841                                 break;
6842                         }
6843                         break;
6844                 case 224: /* SDMA trap event */
6845                         /* XXX check the bitfield order! */
6846                         me_id = (ring_id & 0x3) >> 0;
6847                         queue_id = (ring_id & 0xc) >> 2;
6848                         DRM_DEBUG("IH: SDMA trap\n");
6849                         switch (me_id) {
6850                         case 0:
6851                                 switch (queue_id) {
6852                                 case 0:
6853                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6854                                         break;
6855                                 case 1:
6856                                         /* XXX compute */
6857                                         break;
6858                                 case 2:
6859                                         /* XXX compute */
6860                                         break;
6861                                 }
6862                                 break;
6863                         case 1:
6864                                 switch (queue_id) {
6865                                 case 0:
6866                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6867                                         break;
6868                                 case 1:
6869                                         /* XXX compute */
6870                                         break;
6871                                 case 2:
6872                                         /* XXX compute */
6873                                         break;
6874                                 }
6875                                 break;
6876                         }
6877                         break;
6878                 case 230: /* thermal low to high */
6879                         DRM_DEBUG("IH: thermal low to high\n");
6880                         rdev->pm.dpm.thermal.high_to_low = false;
6881                         queue_thermal = true;
6882                         break;
6883                 case 231: /* thermal high to low */
6884                         DRM_DEBUG("IH: thermal high to low\n");
6885                         rdev->pm.dpm.thermal.high_to_low = true;
6886                         queue_thermal = true;
6887                         break;
6888                 case 233: /* GUI IDLE */
6889                         DRM_DEBUG("IH: GUI idle\n");
6890                         break;
6891                 case 241: /* SDMA Privileged inst */
6892                 case 247: /* SDMA Privileged inst */
6893                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6894                         /* XXX check the bitfield order! */
6895                         me_id = (ring_id & 0x3) >> 0;
6896                         queue_id = (ring_id & 0xc) >> 2;
6897                         switch (me_id) {
6898                         case 0:
6899                                 switch (queue_id) {
6900                                 case 0:
6901                                         queue_reset = true;
6902                                         break;
6903                                 case 1:
6904                                         /* XXX compute */
6905                                         queue_reset = true;
6906                                         break;
6907                                 case 2:
6908                                         /* XXX compute */
6909                                         queue_reset = true;
6910                                         break;
6911                                 }
6912                                 break;
6913                         case 1:
6914                                 switch (queue_id) {
6915                                 case 0:
6916                                         queue_reset = true;
6917                                         break;
6918                                 case 1:
6919                                         /* XXX compute */
6920                                         queue_reset = true;
6921                                         break;
6922                                 case 2:
6923                                         /* XXX compute */
6924                                         queue_reset = true;
6925                                         break;
6926                                 }
6927                                 break;
6928                         }
6929                         break;
6930                 default:
6931                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6932                         break;
6933                 }
6934
6935                 /* wptr/rptr are in bytes! */
6936                 rptr += 16;
6937                 rptr &= rdev->ih.ptr_mask;
6938         }
6939         if (queue_hotplug)
6940                 schedule_work(&rdev->hotplug_work);
6941         if (queue_reset)
6942                 schedule_work(&rdev->reset_work);
6943         if (queue_thermal)
6944                 schedule_work(&rdev->pm.dpm.thermal.work);
6945         rdev->ih.rptr = rptr;
6946         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6947         atomic_set(&rdev->ih.lock, 0);
6948
6949         /* make sure wptr hasn't changed while processing */
6950         wptr = cik_get_ih_wptr(rdev);
6951         if (wptr != rptr)
6952                 goto restart_ih;
6953
6954         return IRQ_HANDLED;
6955 }
6956
6957 /*
6958  * startup/shutdown callbacks
6959  */
6960 /**
6961  * cik_startup - program the asic to a functional state
6962  *
6963  * @rdev: radeon_device pointer
6964  *
6965  * Programs the asic to a functional state (CIK).
6966  * Called by cik_init() and cik_resume().
6967  * Returns 0 for success, error for failure.
6968  */
6969 static int cik_startup(struct radeon_device *rdev)
6970 {
6971         struct radeon_ring *ring;
6972         int r;
6973
6974         /* enable pcie gen2/3 link */
6975         cik_pcie_gen3_enable(rdev);
6976         /* enable aspm */
6977         cik_program_aspm(rdev);
6978
6979         /* scratch needs to be initialized before MC */
6980         r = r600_vram_scratch_init(rdev);
6981         if (r)
6982                 return r;
6983
6984         cik_mc_program(rdev);
6985
6986         if (rdev->flags & RADEON_IS_IGP) {
6987                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6988                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6989                         r = cik_init_microcode(rdev);
6990                         if (r) {
6991                                 DRM_ERROR("Failed to load firmware!\n");
6992                                 return r;
6993                         }
6994                 }
6995         } else {
6996                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6997                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6998                     !rdev->mc_fw) {
6999                         r = cik_init_microcode(rdev);
7000                         if (r) {
7001                                 DRM_ERROR("Failed to load firmware!\n");
7002                                 return r;
7003                         }
7004                 }
7005
7006                 r = ci_mc_load_microcode(rdev);
7007                 if (r) {
7008                         DRM_ERROR("Failed to load MC firmware!\n");
7009                         return r;
7010                 }
7011         }
7012
7013         r = cik_pcie_gart_enable(rdev);
7014         if (r)
7015                 return r;
7016         cik_gpu_init(rdev);
7017
7018         /* allocate rlc buffers */
7019         if (rdev->flags & RADEON_IS_IGP) {
7020                 if (rdev->family == CHIP_KAVERI) {
7021                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7022                         rdev->rlc.reg_list_size =
7023                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7024                 } else {
7025                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7026                         rdev->rlc.reg_list_size =
7027                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7028                 }
7029         }
7030         rdev->rlc.cs_data = ci_cs_data;
7031         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7032         r = sumo_rlc_init(rdev);
7033         if (r) {
7034                 DRM_ERROR("Failed to init rlc BOs!\n");
7035                 return r;
7036         }
7037
7038         /* allocate wb buffer */
7039         r = radeon_wb_init(rdev);
7040         if (r)
7041                 return r;
7042
7043         /* allocate mec buffers */
7044         r = cik_mec_init(rdev);
7045         if (r) {
7046                 DRM_ERROR("Failed to init MEC BOs!\n");
7047                 return r;
7048         }
7049
7050         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7051         if (r) {
7052                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7053                 return r;
7054         }
7055
7056         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7057         if (r) {
7058                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7059                 return r;
7060         }
7061
7062         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7063         if (r) {
7064                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7065                 return r;
7066         }
7067
7068         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7069         if (r) {
7070                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7071                 return r;
7072         }
7073
7074         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7075         if (r) {
7076                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7077                 return r;
7078         }
7079
7080         r = radeon_uvd_resume(rdev);
7081         if (!r) {
7082                 r = uvd_v4_2_resume(rdev);
7083                 if (!r) {
7084                         r = radeon_fence_driver_start_ring(rdev,
7085                                                            R600_RING_TYPE_UVD_INDEX);
7086                         if (r)
7087                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7088                 }
7089         }
7090         if (r)
7091                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7092
7093         /* Enable IRQ */
7094         if (!rdev->irq.installed) {
7095                 r = radeon_irq_kms_init(rdev);
7096                 if (r)
7097                         return r;
7098         }
7099
7100         r = cik_irq_init(rdev);
7101         if (r) {
7102                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7103                 radeon_irq_kms_fini(rdev);
7104                 return r;
7105         }
7106         cik_irq_set(rdev);
7107
7108         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7109         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7110                              CP_RB0_RPTR, CP_RB0_WPTR,
7111                              RADEON_CP_PACKET2);
7112         if (r)
7113                 return r;
7114
7115         /* set up the compute queues */
7116         /* type-2 packets are deprecated on MEC, use type-3 instead */
7117         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7118         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7119                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7120                              PACKET3(PACKET3_NOP, 0x3FFF));
7121         if (r)
7122                 return r;
7123         ring->me = 1; /* first MEC */
7124         ring->pipe = 0; /* first pipe */
7125         ring->queue = 0; /* first queue */
7126         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7127
7128         /* type-2 packets are deprecated on MEC, use type-3 instead */
7129         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7130         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7131                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7132                              PACKET3(PACKET3_NOP, 0x3FFF));
7133         if (r)
7134                 return r;
7135         /* dGPU only have 1 MEC */
7136         ring->me = 1; /* first MEC */
7137         ring->pipe = 0; /* first pipe */
7138         ring->queue = 1; /* second queue */
7139         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7140
7141         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7142         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7143                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7144                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7145                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7146         if (r)
7147                 return r;
7148
7149         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7150         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7151                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7152                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7153                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7154         if (r)
7155                 return r;
7156
7157         r = cik_cp_resume(rdev);
7158         if (r)
7159                 return r;
7160
7161         r = cik_sdma_resume(rdev);
7162         if (r)
7163                 return r;
7164
7165         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7166         if (ring->ring_size) {
7167                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7168                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7169                                      RADEON_CP_PACKET2);
7170                 if (!r)
7171                         r = uvd_v1_0_init(rdev);
7172                 if (r)
7173                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7174         }
7175
7176         r = radeon_ib_pool_init(rdev);
7177         if (r) {
7178                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7179                 return r;
7180         }
7181
7182         r = radeon_vm_manager_init(rdev);
7183         if (r) {
7184                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7185                 return r;
7186         }
7187
7188         r = dce6_audio_init(rdev);
7189         if (r)
7190                 return r;
7191
7192         return 0;
7193 }
7194
7195 /**
7196  * cik_resume - resume the asic to a functional state
7197  *
7198  * @rdev: radeon_device pointer
7199  *
7200  * Programs the asic to a functional state (CIK).
7201  * Called at resume.
7202  * Returns 0 for success, error for failure.
7203  */
7204 int cik_resume(struct radeon_device *rdev)
7205 {
7206         int r;
7207
7208         /* post card */
7209         atom_asic_init(rdev->mode_info.atom_context);
7210
7211         /* init golden registers */
7212         cik_init_golden_registers(rdev);
7213
7214         rdev->accel_working = true;
7215         r = cik_startup(rdev);
7216         if (r) {
7217                 DRM_ERROR("cik startup failed on resume\n");
7218                 rdev->accel_working = false;
7219                 return r;
7220         }
7221
7222         return r;
7223
7224 }
7225
7226 /**
7227  * cik_suspend - suspend the asic
7228  *
7229  * @rdev: radeon_device pointer
7230  *
7231  * Bring the chip into a state suitable for suspend (CIK).
7232  * Called at suspend.
7233  * Returns 0 for success.
7234  */
7235 int cik_suspend(struct radeon_device *rdev)
7236 {
7237         dce6_audio_fini(rdev);
7238         radeon_vm_manager_fini(rdev);
7239         cik_cp_enable(rdev, false);
7240         cik_sdma_enable(rdev, false);
7241         uvd_v1_0_fini(rdev);
7242         radeon_uvd_suspend(rdev);
7243         cik_fini_pg(rdev);
7244         cik_fini_cg(rdev);
7245         cik_irq_suspend(rdev);
7246         radeon_wb_disable(rdev);
7247         cik_pcie_gart_disable(rdev);
7248         return 0;
7249 }
7250
7251 /* Plan is to move initialization in that function and use
7252  * helper function so that radeon_device_init pretty much
7253  * do nothing more than calling asic specific function. This
7254  * should also allow to remove a bunch of callback function
7255  * like vram_info.
7256  */
7257 /**
7258  * cik_init - asic specific driver and hw init
7259  *
7260  * @rdev: radeon_device pointer
7261  *
7262  * Setup asic specific driver variables and program the hw
7263  * to a functional state (CIK).
7264  * Called at driver startup.
7265  * Returns 0 for success, errors for failure.
7266  */
7267 int cik_init(struct radeon_device *rdev)
7268 {
7269         struct radeon_ring *ring;
7270         int r;
7271
7272         /* Read BIOS */
7273         if (!radeon_get_bios(rdev)) {
7274                 if (ASIC_IS_AVIVO(rdev))
7275                         return -EINVAL;
7276         }
7277         /* Must be an ATOMBIOS */
7278         if (!rdev->is_atom_bios) {
7279                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7280                 return -EINVAL;
7281         }
7282         r = radeon_atombios_init(rdev);
7283         if (r)
7284                 return r;
7285
7286         /* Post card if necessary */
7287         if (!radeon_card_posted(rdev)) {
7288                 if (!rdev->bios) {
7289                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7290                         return -EINVAL;
7291                 }
7292                 DRM_INFO("GPU not posted. posting now...\n");
7293                 atom_asic_init(rdev->mode_info.atom_context);
7294         }
7295         /* init golden registers */
7296         cik_init_golden_registers(rdev);
7297         /* Initialize scratch registers */
7298         cik_scratch_init(rdev);
7299         /* Initialize surface registers */
7300         radeon_surface_init(rdev);
7301         /* Initialize clocks */
7302         radeon_get_clock_info(rdev->ddev);
7303
7304         /* Fence driver */
7305         r = radeon_fence_driver_init(rdev);
7306         if (r)
7307                 return r;
7308
7309         /* initialize memory controller */
7310         r = cik_mc_init(rdev);
7311         if (r)
7312                 return r;
7313         /* Memory manager */
7314         r = radeon_bo_init(rdev);
7315         if (r)
7316                 return r;
7317
7318         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7319         ring->ring_obj = NULL;
7320         r600_ring_init(rdev, ring, 1024 * 1024);
7321
7322         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7323         ring->ring_obj = NULL;
7324         r600_ring_init(rdev, ring, 1024 * 1024);
7325         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7326         if (r)
7327                 return r;
7328
7329         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7330         ring->ring_obj = NULL;
7331         r600_ring_init(rdev, ring, 1024 * 1024);
7332         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7333         if (r)
7334                 return r;
7335
7336         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7337         ring->ring_obj = NULL;
7338         r600_ring_init(rdev, ring, 256 * 1024);
7339
7340         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7341         ring->ring_obj = NULL;
7342         r600_ring_init(rdev, ring, 256 * 1024);
7343
7344         r = radeon_uvd_init(rdev);
7345         if (!r) {
7346                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7347                 ring->ring_obj = NULL;
7348                 r600_ring_init(rdev, ring, 4096);
7349         }
7350
7351         rdev->ih.ring_obj = NULL;
7352         r600_ih_ring_init(rdev, 64 * 1024);
7353
7354         r = r600_pcie_gart_init(rdev);
7355         if (r)
7356                 return r;
7357
7358         rdev->accel_working = true;
7359         r = cik_startup(rdev);
7360         if (r) {
7361                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7362                 cik_cp_fini(rdev);
7363                 cik_sdma_fini(rdev);
7364                 cik_irq_fini(rdev);
7365                 sumo_rlc_fini(rdev);
7366                 cik_mec_fini(rdev);
7367                 radeon_wb_fini(rdev);
7368                 radeon_ib_pool_fini(rdev);
7369                 radeon_vm_manager_fini(rdev);
7370                 radeon_irq_kms_fini(rdev);
7371                 cik_pcie_gart_fini(rdev);
7372                 rdev->accel_working = false;
7373         }
7374
7375         /* Don't start up if the MC ucode is missing.
7376          * The default clocks and voltages before the MC ucode
7377          * is loaded are not suffient for advanced operations.
7378          */
7379         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7380                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7381                 return -EINVAL;
7382         }
7383
7384         return 0;
7385 }
7386
7387 /**
7388  * cik_fini - asic specific driver and hw fini
7389  *
7390  * @rdev: radeon_device pointer
7391  *
7392  * Tear down the asic specific driver variables and program the hw
7393  * to an idle state (CIK).
7394  * Called at driver unload.
7395  */
7396 void cik_fini(struct radeon_device *rdev)
7397 {
7398         cik_cp_fini(rdev);
7399         cik_sdma_fini(rdev);
7400         cik_fini_pg(rdev);
7401         cik_fini_cg(rdev);
7402         cik_irq_fini(rdev);
7403         sumo_rlc_fini(rdev);
7404         cik_mec_fini(rdev);
7405         radeon_wb_fini(rdev);
7406         radeon_vm_manager_fini(rdev);
7407         radeon_ib_pool_fini(rdev);
7408         radeon_irq_kms_fini(rdev);
7409         uvd_v1_0_fini(rdev);
7410         radeon_uvd_fini(rdev);
7411         cik_pcie_gart_fini(rdev);
7412         r600_vram_scratch_fini(rdev);
7413         radeon_gem_fini(rdev);
7414         radeon_fence_driver_fini(rdev);
7415         radeon_bo_fini(rdev);
7416         radeon_atombios_fini(rdev);
7417         kfree(rdev->bios);
7418         rdev->bios = NULL;
7419 }
7420
7421 /* display watermark setup */
7422 /**
7423  * dce8_line_buffer_adjust - Set up the line buffer
7424  *
7425  * @rdev: radeon_device pointer
7426  * @radeon_crtc: the selected display controller
7427  * @mode: the current display mode on the selected display
7428  * controller
7429  *
7430  * Setup up the line buffer allocation for
7431  * the selected display controller (CIK).
7432  * Returns the line buffer size in pixels.
7433  */
7434 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7435                                    struct radeon_crtc *radeon_crtc,
7436                                    struct drm_display_mode *mode)
7437 {
7438         u32 tmp, buffer_alloc, i;
7439         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7440         /*
7441          * Line Buffer Setup
7442          * There are 6 line buffers, one for each display controllers.
7443          * There are 3 partitions per LB. Select the number of partitions
7444          * to enable based on the display width.  For display widths larger
7445          * than 4096, you need use to use 2 display controllers and combine
7446          * them using the stereo blender.
7447          */
7448         if (radeon_crtc->base.enabled && mode) {
7449                 if (mode->crtc_hdisplay < 1920) {
7450                         tmp = 1;
7451                         buffer_alloc = 2;
7452                 } else if (mode->crtc_hdisplay < 2560) {
7453                         tmp = 2;
7454                         buffer_alloc = 2;
7455                 } else if (mode->crtc_hdisplay < 4096) {
7456                         tmp = 0;
7457                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7458                 } else {
7459                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7460                         tmp = 0;
7461                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7462                 }
7463         } else {
7464                 tmp = 1;
7465                 buffer_alloc = 0;
7466         }
7467
7468         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7469                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7470
7471         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7472                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7473         for (i = 0; i < rdev->usec_timeout; i++) {
7474                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7475                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7476                         break;
7477                 udelay(1);
7478         }
7479
7480         if (radeon_crtc->base.enabled && mode) {
7481                 switch (tmp) {
7482                 case 0:
7483                 default:
7484                         return 4096 * 2;
7485                 case 1:
7486                         return 1920 * 2;
7487                 case 2:
7488                         return 2560 * 2;
7489                 }
7490         }
7491
7492         /* controller not enabled, so no lb used */
7493         return 0;
7494 }
7495
7496 /**
7497  * cik_get_number_of_dram_channels - get the number of dram channels
7498  *
7499  * @rdev: radeon_device pointer
7500  *
7501  * Look up the number of video ram channels (CIK).
7502  * Used for display watermark bandwidth calculations
7503  * Returns the number of dram channels
7504  */
7505 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7506 {
7507         u32 tmp = RREG32(MC_SHARED_CHMAP);
7508
7509         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7510         case 0:
7511         default:
7512                 return 1;
7513         case 1:
7514                 return 2;
7515         case 2:
7516                 return 4;
7517         case 3:
7518                 return 8;
7519         case 4:
7520                 return 3;
7521         case 5:
7522                 return 6;
7523         case 6:
7524                 return 10;
7525         case 7:
7526                 return 12;
7527         case 8:
7528                 return 16;
7529         }
7530 }
7531
7532 struct dce8_wm_params {
7533         u32 dram_channels; /* number of dram channels */
7534         u32 yclk;          /* bandwidth per dram data pin in kHz */
7535         u32 sclk;          /* engine clock in kHz */
7536         u32 disp_clk;      /* display clock in kHz */
7537         u32 src_width;     /* viewport width */
7538         u32 active_time;   /* active display time in ns */
7539         u32 blank_time;    /* blank time in ns */
7540         bool interlaced;    /* mode is interlaced */
7541         fixed20_12 vsc;    /* vertical scale ratio */
7542         u32 num_heads;     /* number of active crtcs */
7543         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7544         u32 lb_size;       /* line buffer allocated to pipe */
7545         u32 vtaps;         /* vertical scaler taps */
7546 };
7547
7548 /**
7549  * dce8_dram_bandwidth - get the dram bandwidth
7550  *
7551  * @wm: watermark calculation data
7552  *
7553  * Calculate the raw dram bandwidth (CIK).
7554  * Used for display watermark bandwidth calculations
7555  * Returns the dram bandwidth in MBytes/s
7556  */
7557 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7558 {
7559         /* Calculate raw DRAM Bandwidth */
7560         fixed20_12 dram_efficiency; /* 0.7 */
7561         fixed20_12 yclk, dram_channels, bandwidth;
7562         fixed20_12 a;
7563
7564         a.full = dfixed_const(1000);
7565         yclk.full = dfixed_const(wm->yclk);
7566         yclk.full = dfixed_div(yclk, a);
7567         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7568         a.full = dfixed_const(10);
7569         dram_efficiency.full = dfixed_const(7);
7570         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7571         bandwidth.full = dfixed_mul(dram_channels, yclk);
7572         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7573
7574         return dfixed_trunc(bandwidth);
7575 }
7576
7577 /**
7578  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7579  *
7580  * @wm: watermark calculation data
7581  *
7582  * Calculate the dram bandwidth used for display (CIK).
7583  * Used for display watermark bandwidth calculations
7584  * Returns the dram bandwidth for display in MBytes/s
7585  */
7586 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7587 {
7588         /* Calculate DRAM Bandwidth and the part allocated to display. */
7589         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7590         fixed20_12 yclk, dram_channels, bandwidth;
7591         fixed20_12 a;
7592
7593         a.full = dfixed_const(1000);
7594         yclk.full = dfixed_const(wm->yclk);
7595         yclk.full = dfixed_div(yclk, a);
7596         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7597         a.full = dfixed_const(10);
7598         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7599         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7600         bandwidth.full = dfixed_mul(dram_channels, yclk);
7601         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7602
7603         return dfixed_trunc(bandwidth);
7604 }
7605
7606 /**
7607  * dce8_data_return_bandwidth - get the data return bandwidth
7608  *
7609  * @wm: watermark calculation data
7610  *
7611  * Calculate the data return bandwidth used for display (CIK).
7612  * Used for display watermark bandwidth calculations
7613  * Returns the data return bandwidth in MBytes/s
7614  */
7615 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7616 {
7617         /* Calculate the display Data return Bandwidth */
7618         fixed20_12 return_efficiency; /* 0.8 */
7619         fixed20_12 sclk, bandwidth;
7620         fixed20_12 a;
7621
7622         a.full = dfixed_const(1000);
7623         sclk.full = dfixed_const(wm->sclk);
7624         sclk.full = dfixed_div(sclk, a);
7625         a.full = dfixed_const(10);
7626         return_efficiency.full = dfixed_const(8);
7627         return_efficiency.full = dfixed_div(return_efficiency, a);
7628         a.full = dfixed_const(32);
7629         bandwidth.full = dfixed_mul(a, sclk);
7630         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7631
7632         return dfixed_trunc(bandwidth);
7633 }
7634
7635 /**
7636  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7637  *
7638  * @wm: watermark calculation data
7639  *
7640  * Calculate the dmif bandwidth used for display (CIK).
7641  * Used for display watermark bandwidth calculations
7642  * Returns the dmif bandwidth in MBytes/s
7643  */
7644 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7645 {
7646         /* Calculate the DMIF Request Bandwidth */
7647         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7648         fixed20_12 disp_clk, bandwidth;
7649         fixed20_12 a, b;
7650
7651         a.full = dfixed_const(1000);
7652         disp_clk.full = dfixed_const(wm->disp_clk);
7653         disp_clk.full = dfixed_div(disp_clk, a);
7654         a.full = dfixed_const(32);
7655         b.full = dfixed_mul(a, disp_clk);
7656
7657         a.full = dfixed_const(10);
7658         disp_clk_request_efficiency.full = dfixed_const(8);
7659         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7660
7661         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7662
7663         return dfixed_trunc(bandwidth);
7664 }
7665
7666 /**
7667  * dce8_available_bandwidth - get the min available bandwidth
7668  *
7669  * @wm: watermark calculation data
7670  *
7671  * Calculate the min available bandwidth used for display (CIK).
7672  * Used for display watermark bandwidth calculations
7673  * Returns the min available bandwidth in MBytes/s
7674  */
7675 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7676 {
7677         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7678         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7679         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7680         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7681
7682         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7683 }
7684
7685 /**
7686  * dce8_average_bandwidth - get the average available bandwidth
7687  *
7688  * @wm: watermark calculation data
7689  *
7690  * Calculate the average available bandwidth used for display (CIK).
7691  * Used for display watermark bandwidth calculations
7692  * Returns the average available bandwidth in MBytes/s
7693  */
7694 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7695 {
7696         /* Calculate the display mode Average Bandwidth
7697          * DisplayMode should contain the source and destination dimensions,
7698          * timing, etc.
7699          */
7700         fixed20_12 bpp;
7701         fixed20_12 line_time;
7702         fixed20_12 src_width;
7703         fixed20_12 bandwidth;
7704         fixed20_12 a;
7705
7706         a.full = dfixed_const(1000);
7707         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7708         line_time.full = dfixed_div(line_time, a);
7709         bpp.full = dfixed_const(wm->bytes_per_pixel);
7710         src_width.full = dfixed_const(wm->src_width);
7711         bandwidth.full = dfixed_mul(src_width, bpp);
7712         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7713         bandwidth.full = dfixed_div(bandwidth, line_time);
7714
7715         return dfixed_trunc(bandwidth);
7716 }
7717
7718 /**
7719  * dce8_latency_watermark - get the latency watermark
7720  *
7721  * @wm: watermark calculation data
7722  *
7723  * Calculate the latency watermark (CIK).
7724  * Used for display watermark bandwidth calculations
7725  * Returns the latency watermark in ns
7726  */
7727 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7728 {
7729         /* First calculate the latency in ns */
7730         u32 mc_latency = 2000; /* 2000 ns. */
7731         u32 available_bandwidth = dce8_available_bandwidth(wm);
7732         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7733         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7734         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7735         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7736                 (wm->num_heads * cursor_line_pair_return_time);
7737         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7738         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7739         u32 tmp, dmif_size = 12288;
7740         fixed20_12 a, b, c;
7741
7742         if (wm->num_heads == 0)
7743                 return 0;
7744
7745         a.full = dfixed_const(2);
7746         b.full = dfixed_const(1);
7747         if ((wm->vsc.full > a.full) ||
7748             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7749             (wm->vtaps >= 5) ||
7750             ((wm->vsc.full >= a.full) && wm->interlaced))
7751                 max_src_lines_per_dst_line = 4;
7752         else
7753                 max_src_lines_per_dst_line = 2;
7754
7755         a.full = dfixed_const(available_bandwidth);
7756         b.full = dfixed_const(wm->num_heads);
7757         a.full = dfixed_div(a, b);
7758
7759         b.full = dfixed_const(mc_latency + 512);
7760         c.full = dfixed_const(wm->disp_clk);
7761         b.full = dfixed_div(b, c);
7762
7763         c.full = dfixed_const(dmif_size);
7764         b.full = dfixed_div(c, b);
7765
7766         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7767
7768         b.full = dfixed_const(1000);
7769         c.full = dfixed_const(wm->disp_clk);
7770         b.full = dfixed_div(c, b);
7771         c.full = dfixed_const(wm->bytes_per_pixel);
7772         b.full = dfixed_mul(b, c);
7773
7774         lb_fill_bw = min(tmp, dfixed_trunc(b));
7775
7776         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7777         b.full = dfixed_const(1000);
7778         c.full = dfixed_const(lb_fill_bw);
7779         b.full = dfixed_div(c, b);
7780         a.full = dfixed_div(a, b);
7781         line_fill_time = dfixed_trunc(a);
7782
7783         if (line_fill_time < wm->active_time)
7784                 return latency;
7785         else
7786                 return latency + (line_fill_time - wm->active_time);
7787
7788 }
7789
7790 /**
7791  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7792  * average and available dram bandwidth
7793  *
7794  * @wm: watermark calculation data
7795  *
7796  * Check if the display average bandwidth fits in the display
7797  * dram bandwidth (CIK).
7798  * Used for display watermark bandwidth calculations
7799  * Returns true if the display fits, false if not.
7800  */
7801 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7802 {
7803         if (dce8_average_bandwidth(wm) <=
7804             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7805                 return true;
7806         else
7807                 return false;
7808 }
7809
7810 /**
7811  * dce8_average_bandwidth_vs_available_bandwidth - check
7812  * average and available bandwidth
7813  *
7814  * @wm: watermark calculation data
7815  *
7816  * Check if the display average bandwidth fits in the display
7817  * available bandwidth (CIK).
7818  * Used for display watermark bandwidth calculations
7819  * Returns true if the display fits, false if not.
7820  */
7821 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7822 {
7823         if (dce8_average_bandwidth(wm) <=
7824             (dce8_available_bandwidth(wm) / wm->num_heads))
7825                 return true;
7826         else
7827                 return false;
7828 }
7829
7830 /**
7831  * dce8_check_latency_hiding - check latency hiding
7832  *
7833  * @wm: watermark calculation data
7834  *
7835  * Check latency hiding (CIK).
7836  * Used for display watermark bandwidth calculations
7837  * Returns true if the display fits, false if not.
7838  */
7839 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7840 {
7841         u32 lb_partitions = wm->lb_size / wm->src_width;
7842         u32 line_time = wm->active_time + wm->blank_time;
7843         u32 latency_tolerant_lines;
7844         u32 latency_hiding;
7845         fixed20_12 a;
7846
7847         a.full = dfixed_const(1);
7848         if (wm->vsc.full > a.full)
7849                 latency_tolerant_lines = 1;
7850         else {
7851                 if (lb_partitions <= (wm->vtaps + 1))
7852                         latency_tolerant_lines = 1;
7853                 else
7854                         latency_tolerant_lines = 2;
7855         }
7856
7857         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7858
7859         if (dce8_latency_watermark(wm) <= latency_hiding)
7860                 return true;
7861         else
7862                 return false;
7863 }
7864
7865 /**
7866  * dce8_program_watermarks - program display watermarks
7867  *
7868  * @rdev: radeon_device pointer
7869  * @radeon_crtc: the selected display controller
7870  * @lb_size: line buffer size
7871  * @num_heads: number of display controllers in use
7872  *
7873  * Calculate and program the display watermarks for the
7874  * selected display controller (CIK).
7875  */
7876 static void dce8_program_watermarks(struct radeon_device *rdev,
7877                                     struct radeon_crtc *radeon_crtc,
7878                                     u32 lb_size, u32 num_heads)
7879 {
7880         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7881         struct dce8_wm_params wm_low, wm_high;
7882         u32 pixel_period;
7883         u32 line_time = 0;
7884         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7885         u32 tmp, wm_mask;
7886
7887         if (radeon_crtc->base.enabled && num_heads && mode) {
7888                 pixel_period = 1000000 / (u32)mode->clock;
7889                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7890
7891                 /* watermark for high clocks */
7892                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7893                     rdev->pm.dpm_enabled) {
7894                         wm_high.yclk =
7895                                 radeon_dpm_get_mclk(rdev, false) * 10;
7896                         wm_high.sclk =
7897                                 radeon_dpm_get_sclk(rdev, false) * 10;
7898                 } else {
7899                         wm_high.yclk = rdev->pm.current_mclk * 10;
7900                         wm_high.sclk = rdev->pm.current_sclk * 10;
7901                 }
7902
7903                 wm_high.disp_clk = mode->clock;
7904                 wm_high.src_width = mode->crtc_hdisplay;
7905                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7906                 wm_high.blank_time = line_time - wm_high.active_time;
7907                 wm_high.interlaced = false;
7908                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7909                         wm_high.interlaced = true;
7910                 wm_high.vsc = radeon_crtc->vsc;
7911                 wm_high.vtaps = 1;
7912                 if (radeon_crtc->rmx_type != RMX_OFF)
7913                         wm_high.vtaps = 2;
7914                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7915                 wm_high.lb_size = lb_size;
7916                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7917                 wm_high.num_heads = num_heads;
7918
7919                 /* set for high clocks */
7920                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7921
7922                 /* possibly force display priority to high */
7923                 /* should really do this at mode validation time... */
7924                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7925                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7926                     !dce8_check_latency_hiding(&wm_high) ||
7927                     (rdev->disp_priority == 2)) {
7928                         DRM_DEBUG_KMS("force priority to high\n");
7929                 }
7930
7931                 /* watermark for low clocks */
7932                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7933                     rdev->pm.dpm_enabled) {
7934                         wm_low.yclk =
7935                                 radeon_dpm_get_mclk(rdev, true) * 10;
7936                         wm_low.sclk =
7937                                 radeon_dpm_get_sclk(rdev, true) * 10;
7938                 } else {
7939                         wm_low.yclk = rdev->pm.current_mclk * 10;
7940                         wm_low.sclk = rdev->pm.current_sclk * 10;
7941                 }
7942
7943                 wm_low.disp_clk = mode->clock;
7944                 wm_low.src_width = mode->crtc_hdisplay;
7945                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7946                 wm_low.blank_time = line_time - wm_low.active_time;
7947                 wm_low.interlaced = false;
7948                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7949                         wm_low.interlaced = true;
7950                 wm_low.vsc = radeon_crtc->vsc;
7951                 wm_low.vtaps = 1;
7952                 if (radeon_crtc->rmx_type != RMX_OFF)
7953                         wm_low.vtaps = 2;
7954                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7955                 wm_low.lb_size = lb_size;
7956                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7957                 wm_low.num_heads = num_heads;
7958
7959                 /* set for low clocks */
7960                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7961
7962                 /* possibly force display priority to high */
7963                 /* should really do this at mode validation time... */
7964                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7965                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7966                     !dce8_check_latency_hiding(&wm_low) ||
7967                     (rdev->disp_priority == 2)) {
7968                         DRM_DEBUG_KMS("force priority to high\n");
7969                 }
7970         }
7971
7972         /* select wm A */
7973         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7974         tmp = wm_mask;
7975         tmp &= ~LATENCY_WATERMARK_MASK(3);
7976         tmp |= LATENCY_WATERMARK_MASK(1);
7977         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7978         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7979                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7980                 LATENCY_HIGH_WATERMARK(line_time)));
7981         /* select wm B */
7982         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7983         tmp &= ~LATENCY_WATERMARK_MASK(3);
7984         tmp |= LATENCY_WATERMARK_MASK(2);
7985         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7986         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7987                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7988                 LATENCY_HIGH_WATERMARK(line_time)));
7989         /* restore original selection */
7990         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7991
7992         /* save values for DPM */
7993         radeon_crtc->line_time = line_time;
7994         radeon_crtc->wm_high = latency_watermark_a;
7995         radeon_crtc->wm_low = latency_watermark_b;
7996 }
7997
7998 /**
7999  * dce8_bandwidth_update - program display watermarks
8000  *
8001  * @rdev: radeon_device pointer
8002  *
8003  * Calculate and program the display watermarks and line
8004  * buffer allocation (CIK).
8005  */
8006 void dce8_bandwidth_update(struct radeon_device *rdev)
8007 {
8008         struct drm_display_mode *mode = NULL;
8009         u32 num_heads = 0, lb_size;
8010         int i;
8011
8012         radeon_update_display_priority(rdev);
8013
8014         for (i = 0; i < rdev->num_crtc; i++) {
8015                 if (rdev->mode_info.crtcs[i]->base.enabled)
8016                         num_heads++;
8017         }
8018         for (i = 0; i < rdev->num_crtc; i++) {
8019                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8020                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8021                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8022         }
8023 }
8024
8025 /**
8026  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8027  *
8028  * @rdev: radeon_device pointer
8029  *
8030  * Fetches a GPU clock counter snapshot (SI).
8031  * Returns the 64 bit clock counter snapshot.
8032  */
8033 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8034 {
8035         uint64_t clock;
8036
8037         mutex_lock(&rdev->gpu_clock_mutex);
8038         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8039         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8040                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8041         mutex_unlock(&rdev->gpu_clock_mutex);
8042         return clock;
8043 }
8044
8045 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8046                               u32 cntl_reg, u32 status_reg)
8047 {
8048         int r, i;
8049         struct atom_clock_dividers dividers;
8050         uint32_t tmp;
8051
8052         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8053                                            clock, false, &dividers);
8054         if (r)
8055                 return r;
8056
8057         tmp = RREG32_SMC(cntl_reg);
8058         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8059         tmp |= dividers.post_divider;
8060         WREG32_SMC(cntl_reg, tmp);
8061
8062         for (i = 0; i < 100; i++) {
8063                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8064                         break;
8065                 mdelay(10);
8066         }
8067         if (i == 100)
8068                 return -ETIMEDOUT;
8069
8070         return 0;
8071 }
8072
8073 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8074 {
8075         int r = 0;
8076
8077         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8078         if (r)
8079                 return r;
8080
8081         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8082         return r;
8083 }
8084
8085 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8086 {
8087         struct pci_dev *root = rdev->pdev->bus->self;
8088         int bridge_pos, gpu_pos;
8089         u32 speed_cntl, mask, current_data_rate;
8090         int ret, i;
8091         u16 tmp16;
8092
8093         if (radeon_pcie_gen2 == 0)
8094                 return;
8095
8096         if (rdev->flags & RADEON_IS_IGP)
8097                 return;
8098
8099         if (!(rdev->flags & RADEON_IS_PCIE))
8100                 return;
8101
8102         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8103         if (ret != 0)
8104                 return;
8105
8106         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8107                 return;
8108
8109         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8110         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8111                 LC_CURRENT_DATA_RATE_SHIFT;
8112         if (mask & DRM_PCIE_SPEED_80) {
8113                 if (current_data_rate == 2) {
8114                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8115                         return;
8116                 }
8117                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8118         } else if (mask & DRM_PCIE_SPEED_50) {
8119                 if (current_data_rate == 1) {
8120                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8121                         return;
8122                 }
8123                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8124         }
8125
8126         bridge_pos = pci_pcie_cap(root);
8127         if (!bridge_pos)
8128                 return;
8129
8130         gpu_pos = pci_pcie_cap(rdev->pdev);
8131         if (!gpu_pos)
8132                 return;
8133
8134         if (mask & DRM_PCIE_SPEED_80) {
8135                 /* re-try equalization if gen3 is not already enabled */
8136                 if (current_data_rate != 2) {
8137                         u16 bridge_cfg, gpu_cfg;
8138                         u16 bridge_cfg2, gpu_cfg2;
8139                         u32 max_lw, current_lw, tmp;
8140
8141                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8142                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8143
8144                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8145                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8146
8147                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8148                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8149
8150                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8151                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8152                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8153
8154                         if (current_lw < max_lw) {
8155                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8156                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8157                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8158                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8159                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8160                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8161                                 }
8162                         }
8163
8164                         for (i = 0; i < 10; i++) {
8165                                 /* check status */
8166                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8167                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8168                                         break;
8169
8170                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8171                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8172
8173                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8174                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8175
8176                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8177                                 tmp |= LC_SET_QUIESCE;
8178                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8179
8180                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8181                                 tmp |= LC_REDO_EQ;
8182                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8183
8184                                 mdelay(100);
8185
8186                                 /* linkctl */
8187                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8188                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8189                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8190                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8191
8192                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8193                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8194                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8195                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8196
8197                                 /* linkctl2 */
8198                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8199                                 tmp16 &= ~((1 << 4) | (7 << 9));
8200                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8201                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8202
8203                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8204                                 tmp16 &= ~((1 << 4) | (7 << 9));
8205                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8206                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8207
8208                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8209                                 tmp &= ~LC_SET_QUIESCE;
8210                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8211                         }
8212                 }
8213         }
8214
8215         /* set the link speed */
8216         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8217         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8218         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8219
8220         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8221         tmp16 &= ~0xf;
8222         if (mask & DRM_PCIE_SPEED_80)
8223                 tmp16 |= 3; /* gen3 */
8224         else if (mask & DRM_PCIE_SPEED_50)
8225                 tmp16 |= 2; /* gen2 */
8226         else
8227                 tmp16 |= 1; /* gen1 */
8228         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8229
8230         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8231         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8232         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8233
8234         for (i = 0; i < rdev->usec_timeout; i++) {
8235                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8236                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8237                         break;
8238                 udelay(1);
8239         }
8240 }
8241
8242 static void cik_program_aspm(struct radeon_device *rdev)
8243 {
8244         u32 data, orig;
8245         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8246         bool disable_clkreq = false;
8247
8248         if (radeon_aspm == 0)
8249                 return;
8250
8251         /* XXX double check IGPs */
8252         if (rdev->flags & RADEON_IS_IGP)
8253                 return;
8254
8255         if (!(rdev->flags & RADEON_IS_PCIE))
8256                 return;
8257
8258         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8259         data &= ~LC_XMIT_N_FTS_MASK;
8260         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8261         if (orig != data)
8262                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8263
8264         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8265         data |= LC_GO_TO_RECOVERY;
8266         if (orig != data)
8267                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8268
8269         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8270         data |= P_IGNORE_EDB_ERR;
8271         if (orig != data)
8272                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8273
8274         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8275         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8276         data |= LC_PMI_TO_L1_DIS;
8277         if (!disable_l0s)
8278                 data |= LC_L0S_INACTIVITY(7);
8279
8280         if (!disable_l1) {
8281                 data |= LC_L1_INACTIVITY(7);
8282                 data &= ~LC_PMI_TO_L1_DIS;
8283                 if (orig != data)
8284                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8285
8286                 if (!disable_plloff_in_l1) {
8287                         bool clk_req_support;
8288
8289                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8290                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8291                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8292                         if (orig != data)
8293                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8294
8295                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8296                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8297                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8298                         if (orig != data)
8299                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8300
8301                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8302                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8303                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8304                         if (orig != data)
8305                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8306
8307                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8308                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8309                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8310                         if (orig != data)
8311                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8312
8313                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8314                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8315                         data |= LC_DYN_LANES_PWR_STATE(3);
8316                         if (orig != data)
8317                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8318
8319                         if (!disable_clkreq) {
8320                                 struct pci_dev *root = rdev->pdev->bus->self;
8321                                 u32 lnkcap;
8322
8323                                 clk_req_support = false;
8324                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8325                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8326                                         clk_req_support = true;
8327                         } else {
8328                                 clk_req_support = false;
8329                         }
8330
8331                         if (clk_req_support) {
8332                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8333                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8334                                 if (orig != data)
8335                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8336
8337                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8338                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8339                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8340                                 if (orig != data)
8341                                         WREG32_SMC(THM_CLK_CNTL, data);
8342
8343                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8344                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8345                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8346                                 if (orig != data)
8347                                         WREG32_SMC(MISC_CLK_CTRL, data);
8348
8349                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8350                                 data &= ~BCLK_AS_XCLK;
8351                                 if (orig != data)
8352                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8353
8354                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8355                                 data &= ~FORCE_BIF_REFCLK_EN;
8356                                 if (orig != data)
8357                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8358
8359                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8360                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8361                                 data |= MPLL_CLKOUT_SEL(4);
8362                                 if (orig != data)
8363                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8364                         }
8365                 }
8366         } else {
8367                 if (orig != data)
8368                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8369         }
8370
8371         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8372         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8373         if (orig != data)
8374                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8375
8376         if (!disable_l0s) {
8377                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8378                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8379                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8380                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8381                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8382                                 data &= ~LC_L0S_INACTIVITY_MASK;
8383                                 if (orig != data)
8384                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8385                         }
8386                 }
8387         }
8388 }