]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge branch 'drm-next-3.12' of git://people.freedesktop.org/~agd5f/linux into drm...
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80
81 /* get temperature in millidegrees */
82 int ci_get_temp(struct radeon_device *rdev)
83 {
84         u32 temp;
85         int actual_temp = 0;
86
87         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
88                 CTF_TEMP_SHIFT;
89
90         if (temp & 0x200)
91                 actual_temp = 255;
92         else
93                 actual_temp = temp & 0x1ff;
94
95         actual_temp = actual_temp * 1000;
96
97         return actual_temp;
98 }
99
100 /* get temperature in millidegrees */
101 int kv_get_temp(struct radeon_device *rdev)
102 {
103         u32 temp;
104         int actual_temp = 0;
105
106         temp = RREG32_SMC(0xC0300E0C);
107
108         if (temp)
109                 actual_temp = (temp / 8) - 49;
110         else
111                 actual_temp = 0;
112
113         actual_temp = actual_temp * 1000;
114
115         return actual_temp;
116 }
117
118 /*
119  * Indirect registers accessor
120  */
121 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
122 {
123         u32 r;
124
125         WREG32(PCIE_INDEX, reg);
126         (void)RREG32(PCIE_INDEX);
127         r = RREG32(PCIE_DATA);
128         return r;
129 }
130
131 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
132 {
133         WREG32(PCIE_INDEX, reg);
134         (void)RREG32(PCIE_INDEX);
135         WREG32(PCIE_DATA, v);
136         (void)RREG32(PCIE_DATA);
137 }
138
139 static const u32 spectre_rlc_save_restore_register_list[] =
140 {
141         (0x0e00 << 16) | (0xc12c >> 2),
142         0x00000000,
143         (0x0e00 << 16) | (0xc140 >> 2),
144         0x00000000,
145         (0x0e00 << 16) | (0xc150 >> 2),
146         0x00000000,
147         (0x0e00 << 16) | (0xc15c >> 2),
148         0x00000000,
149         (0x0e00 << 16) | (0xc168 >> 2),
150         0x00000000,
151         (0x0e00 << 16) | (0xc170 >> 2),
152         0x00000000,
153         (0x0e00 << 16) | (0xc178 >> 2),
154         0x00000000,
155         (0x0e00 << 16) | (0xc204 >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc2b4 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc2b8 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc2bc >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc2c0 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0x8228 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0x829c >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0x869c >> 2),
170         0x00000000,
171         (0x0600 << 16) | (0x98f4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0x98f8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0x9900 >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc260 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x90e8 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x3c000 >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x3c00c >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0x8c1c >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x9700 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0xcd20 >> 2),
190         0x00000000,
191         (0x4e00 << 16) | (0xcd20 >> 2),
192         0x00000000,
193         (0x5e00 << 16) | (0xcd20 >> 2),
194         0x00000000,
195         (0x6e00 << 16) | (0xcd20 >> 2),
196         0x00000000,
197         (0x7e00 << 16) | (0xcd20 >> 2),
198         0x00000000,
199         (0x8e00 << 16) | (0xcd20 >> 2),
200         0x00000000,
201         (0x9e00 << 16) | (0xcd20 >> 2),
202         0x00000000,
203         (0xae00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0xbe00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x0e00 << 16) | (0x89bc >> 2),
208         0x00000000,
209         (0x0e00 << 16) | (0x8900 >> 2),
210         0x00000000,
211         0x3,
212         (0x0e00 << 16) | (0xc130 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc134 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc1fc >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc208 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc264 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc268 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc26c >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc270 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc274 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc278 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc27c >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc280 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc284 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc288 >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc28c >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc290 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc294 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc298 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc29c >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc2a0 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc2a4 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc2a8 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc2ac  >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc2b0 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0x301d0 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0x30238 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0x30250 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0x30254 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x30258 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x3025c >> 2),
271         0x00000000,
272         (0x4e00 << 16) | (0xc900 >> 2),
273         0x00000000,
274         (0x5e00 << 16) | (0xc900 >> 2),
275         0x00000000,
276         (0x6e00 << 16) | (0xc900 >> 2),
277         0x00000000,
278         (0x7e00 << 16) | (0xc900 >> 2),
279         0x00000000,
280         (0x8e00 << 16) | (0xc900 >> 2),
281         0x00000000,
282         (0x9e00 << 16) | (0xc900 >> 2),
283         0x00000000,
284         (0xae00 << 16) | (0xc900 >> 2),
285         0x00000000,
286         (0xbe00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x4e00 << 16) | (0xc904 >> 2),
289         0x00000000,
290         (0x5e00 << 16) | (0xc904 >> 2),
291         0x00000000,
292         (0x6e00 << 16) | (0xc904 >> 2),
293         0x00000000,
294         (0x7e00 << 16) | (0xc904 >> 2),
295         0x00000000,
296         (0x8e00 << 16) | (0xc904 >> 2),
297         0x00000000,
298         (0x9e00 << 16) | (0xc904 >> 2),
299         0x00000000,
300         (0xae00 << 16) | (0xc904 >> 2),
301         0x00000000,
302         (0xbe00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x4e00 << 16) | (0xc908 >> 2),
305         0x00000000,
306         (0x5e00 << 16) | (0xc908 >> 2),
307         0x00000000,
308         (0x6e00 << 16) | (0xc908 >> 2),
309         0x00000000,
310         (0x7e00 << 16) | (0xc908 >> 2),
311         0x00000000,
312         (0x8e00 << 16) | (0xc908 >> 2),
313         0x00000000,
314         (0x9e00 << 16) | (0xc908 >> 2),
315         0x00000000,
316         (0xae00 << 16) | (0xc908 >> 2),
317         0x00000000,
318         (0xbe00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x4e00 << 16) | (0xc90c >> 2),
321         0x00000000,
322         (0x5e00 << 16) | (0xc90c >> 2),
323         0x00000000,
324         (0x6e00 << 16) | (0xc90c >> 2),
325         0x00000000,
326         (0x7e00 << 16) | (0xc90c >> 2),
327         0x00000000,
328         (0x8e00 << 16) | (0xc90c >> 2),
329         0x00000000,
330         (0x9e00 << 16) | (0xc90c >> 2),
331         0x00000000,
332         (0xae00 << 16) | (0xc90c >> 2),
333         0x00000000,
334         (0xbe00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x4e00 << 16) | (0xc910 >> 2),
337         0x00000000,
338         (0x5e00 << 16) | (0xc910 >> 2),
339         0x00000000,
340         (0x6e00 << 16) | (0xc910 >> 2),
341         0x00000000,
342         (0x7e00 << 16) | (0xc910 >> 2),
343         0x00000000,
344         (0x8e00 << 16) | (0xc910 >> 2),
345         0x00000000,
346         (0x9e00 << 16) | (0xc910 >> 2),
347         0x00000000,
348         (0xae00 << 16) | (0xc910 >> 2),
349         0x00000000,
350         (0xbe00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc99c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0x9834 >> 2),
355         0x00000000,
356         (0x0000 << 16) | (0x30f00 >> 2),
357         0x00000000,
358         (0x0001 << 16) | (0x30f00 >> 2),
359         0x00000000,
360         (0x0000 << 16) | (0x30f04 >> 2),
361         0x00000000,
362         (0x0001 << 16) | (0x30f04 >> 2),
363         0x00000000,
364         (0x0000 << 16) | (0x30f08 >> 2),
365         0x00000000,
366         (0x0001 << 16) | (0x30f08 >> 2),
367         0x00000000,
368         (0x0000 << 16) | (0x30f0c >> 2),
369         0x00000000,
370         (0x0001 << 16) | (0x30f0c >> 2),
371         0x00000000,
372         (0x0600 << 16) | (0x9b7c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0x8a14 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0x8a18 >> 2),
377         0x00000000,
378         (0x0600 << 16) | (0x30a00 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0x8bf0 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0x8bcc >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0x8b24 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0x30a04 >> 2),
387         0x00000000,
388         (0x0600 << 16) | (0x30a10 >> 2),
389         0x00000000,
390         (0x0600 << 16) | (0x30a14 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a18 >> 2),
393         0x00000000,
394         (0x0600 << 16) | (0x30a2c >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0xc700 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0xc704 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0xc708 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0xc768 >> 2),
403         0x00000000,
404         (0x0400 << 16) | (0xc770 >> 2),
405         0x00000000,
406         (0x0400 << 16) | (0xc774 >> 2),
407         0x00000000,
408         (0x0400 << 16) | (0xc778 >> 2),
409         0x00000000,
410         (0x0400 << 16) | (0xc77c >> 2),
411         0x00000000,
412         (0x0400 << 16) | (0xc780 >> 2),
413         0x00000000,
414         (0x0400 << 16) | (0xc784 >> 2),
415         0x00000000,
416         (0x0400 << 16) | (0xc788 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc78c >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc798 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc79c >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc7a0 >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc7a4 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc7a8 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc7ac >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc7b0 >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc7b4 >> 2),
435         0x00000000,
436         (0x0e00 << 16) | (0x9100 >> 2),
437         0x00000000,
438         (0x0e00 << 16) | (0x3c010 >> 2),
439         0x00000000,
440         (0x0e00 << 16) | (0x92a8 >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0x92ac >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x92b4 >> 2),
445         0x00000000,
446         (0x0e00 << 16) | (0x92b8 >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0x92bc >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x92c0 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x92c4 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92c8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92cc >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92d0 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x8c00 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x8c04 >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x8c20 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x8c38 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x8c3c >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0xae00 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x9604 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0xac08 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0xac0c >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0xac10 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xac14 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xac58 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xac68 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0xac6c >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac70 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac74 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac78 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac7c >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac80 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac84 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac88 >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac8c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x970c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x9714 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x9718 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x971c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x31068 >> 2),
513         0x00000000,
514         (0x4e00 << 16) | (0x31068 >> 2),
515         0x00000000,
516         (0x5e00 << 16) | (0x31068 >> 2),
517         0x00000000,
518         (0x6e00 << 16) | (0x31068 >> 2),
519         0x00000000,
520         (0x7e00 << 16) | (0x31068 >> 2),
521         0x00000000,
522         (0x8e00 << 16) | (0x31068 >> 2),
523         0x00000000,
524         (0x9e00 << 16) | (0x31068 >> 2),
525         0x00000000,
526         (0xae00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0xbe00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xcd10 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xcd14 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0x88b0 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0x88b4 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0x88b8 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0x88bc >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0x89c0 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0x88c4 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0x88c8 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88d0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88d4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88d8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x8980 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x30938 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x3093c >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x30940 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x89a0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x30900 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x30904 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x89b4 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c210 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3c214 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x3c218 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x8904 >> 2),
577         0x00000000,
578         0x5,
579         (0x0e00 << 16) | (0x8c28 >> 2),
580         (0x0e00 << 16) | (0x8c2c >> 2),
581         (0x0e00 << 16) | (0x8c30 >> 2),
582         (0x0e00 << 16) | (0x8c34 >> 2),
583         (0x0e00 << 16) | (0x9600 >> 2),
584 };
585
586 static const u32 kalindi_rlc_save_restore_register_list[] =
587 {
588         (0x0e00 << 16) | (0xc12c >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0xc140 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0xc150 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0xc15c >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0xc168 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xc170 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xc204 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xc2b4 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc2b8 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc2bc >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc2c0 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0x8228 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0x829c >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x869c >> 2),
615         0x00000000,
616         (0x0600 << 16) | (0x98f4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x98f8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x9900 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc260 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x90e8 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x3c000 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x3c00c >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x8c1c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x9700 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xcd20 >> 2),
635         0x00000000,
636         (0x4e00 << 16) | (0xcd20 >> 2),
637         0x00000000,
638         (0x5e00 << 16) | (0xcd20 >> 2),
639         0x00000000,
640         (0x6e00 << 16) | (0xcd20 >> 2),
641         0x00000000,
642         (0x7e00 << 16) | (0xcd20 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x89bc >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x8900 >> 2),
647         0x00000000,
648         0x3,
649         (0x0e00 << 16) | (0xc130 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xc134 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xc1fc >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xc208 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc264 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc268 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc26c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc270 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc274 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc28c >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc290 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc294 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc298 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2a0 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2a4 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc2a8 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc2ac >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x301d0 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x30238 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x30250 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x30254 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x30258 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x3025c >> 2),
694         0x00000000,
695         (0x4e00 << 16) | (0xc900 >> 2),
696         0x00000000,
697         (0x5e00 << 16) | (0xc900 >> 2),
698         0x00000000,
699         (0x6e00 << 16) | (0xc900 >> 2),
700         0x00000000,
701         (0x7e00 << 16) | (0xc900 >> 2),
702         0x00000000,
703         (0x4e00 << 16) | (0xc904 >> 2),
704         0x00000000,
705         (0x5e00 << 16) | (0xc904 >> 2),
706         0x00000000,
707         (0x6e00 << 16) | (0xc904 >> 2),
708         0x00000000,
709         (0x7e00 << 16) | (0xc904 >> 2),
710         0x00000000,
711         (0x4e00 << 16) | (0xc908 >> 2),
712         0x00000000,
713         (0x5e00 << 16) | (0xc908 >> 2),
714         0x00000000,
715         (0x6e00 << 16) | (0xc908 >> 2),
716         0x00000000,
717         (0x7e00 << 16) | (0xc908 >> 2),
718         0x00000000,
719         (0x4e00 << 16) | (0xc90c >> 2),
720         0x00000000,
721         (0x5e00 << 16) | (0xc90c >> 2),
722         0x00000000,
723         (0x6e00 << 16) | (0xc90c >> 2),
724         0x00000000,
725         (0x7e00 << 16) | (0xc90c >> 2),
726         0x00000000,
727         (0x4e00 << 16) | (0xc910 >> 2),
728         0x00000000,
729         (0x5e00 << 16) | (0xc910 >> 2),
730         0x00000000,
731         (0x6e00 << 16) | (0xc910 >> 2),
732         0x00000000,
733         (0x7e00 << 16) | (0xc910 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc99c >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0x9834 >> 2),
738         0x00000000,
739         (0x0000 << 16) | (0x30f00 >> 2),
740         0x00000000,
741         (0x0000 << 16) | (0x30f04 >> 2),
742         0x00000000,
743         (0x0000 << 16) | (0x30f08 >> 2),
744         0x00000000,
745         (0x0000 << 16) | (0x30f0c >> 2),
746         0x00000000,
747         (0x0600 << 16) | (0x9b7c >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0x8a14 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x8a18 >> 2),
752         0x00000000,
753         (0x0600 << 16) | (0x30a00 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x8bf0 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x8bcc >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x8b24 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x30a04 >> 2),
762         0x00000000,
763         (0x0600 << 16) | (0x30a10 >> 2),
764         0x00000000,
765         (0x0600 << 16) | (0x30a14 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a18 >> 2),
768         0x00000000,
769         (0x0600 << 16) | (0x30a2c >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0xc700 >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0xc704 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0xc708 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc768 >> 2),
778         0x00000000,
779         (0x0400 << 16) | (0xc770 >> 2),
780         0x00000000,
781         (0x0400 << 16) | (0xc774 >> 2),
782         0x00000000,
783         (0x0400 << 16) | (0xc798 >> 2),
784         0x00000000,
785         (0x0400 << 16) | (0xc79c >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0x9100 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0x3c010 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0x8c00 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0x8c04 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0x8c20 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0x8c38 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0x8c3c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xae00 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x9604 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xac08 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xac0c >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xac10 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xac14 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xac58 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xac68 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xac6c >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac70 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac74 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac78 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac7c >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac80 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac84 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac88 >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac8c >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0x970c >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0x9714 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0x9718 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x971c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0x31068 >> 2),
844         0x00000000,
845         (0x4e00 << 16) | (0x31068 >> 2),
846         0x00000000,
847         (0x5e00 << 16) | (0x31068 >> 2),
848         0x00000000,
849         (0x6e00 << 16) | (0x31068 >> 2),
850         0x00000000,
851         (0x7e00 << 16) | (0x31068 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0xcd10 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0xcd14 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x88b0 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0x88b4 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0x88b8 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x88bc >> 2),
864         0x00000000,
865         (0x0400 << 16) | (0x89c0 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x88c4 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x88c8 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88d0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88d4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88d8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8980 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x30938 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x3093c >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x30940 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x89a0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x30900 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30904 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x89b4 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x3e1fc >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3c210 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x3c214 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x3c218 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x8904 >> 2),
902         0x00000000,
903         0x5,
904         (0x0e00 << 16) | (0x8c28 >> 2),
905         (0x0e00 << 16) | (0x8c2c >> 2),
906         (0x0e00 << 16) | (0x8c30 >> 2),
907         (0x0e00 << 16) | (0x8c34 >> 2),
908         (0x0e00 << 16) | (0x9600 >> 2),
909 };
910
911 static const u32 bonaire_golden_spm_registers[] =
912 {
913         0x30800, 0xe0ffffff, 0xe0000000
914 };
915
916 static const u32 bonaire_golden_common_registers[] =
917 {
918         0xc770, 0xffffffff, 0x00000800,
919         0xc774, 0xffffffff, 0x00000800,
920         0xc798, 0xffffffff, 0x00007fbf,
921         0xc79c, 0xffffffff, 0x00007faf
922 };
923
924 static const u32 bonaire_golden_registers[] =
925 {
926         0x3354, 0x00000333, 0x00000333,
927         0x3350, 0x000c0fc0, 0x00040200,
928         0x9a10, 0x00010000, 0x00058208,
929         0x3c000, 0xffff1fff, 0x00140000,
930         0x3c200, 0xfdfc0fff, 0x00000100,
931         0x3c234, 0x40000000, 0x40000200,
932         0x9830, 0xffffffff, 0x00000000,
933         0x9834, 0xf00fffff, 0x00000400,
934         0x9838, 0x0002021c, 0x00020200,
935         0xc78, 0x00000080, 0x00000000,
936         0x5bb0, 0x000000f0, 0x00000070,
937         0x5bc0, 0xf0311fff, 0x80300000,
938         0x98f8, 0x73773777, 0x12010001,
939         0x350c, 0x00810000, 0x408af000,
940         0x7030, 0x31000111, 0x00000011,
941         0x2f48, 0x73773777, 0x12010001,
942         0x220c, 0x00007fb6, 0x0021a1b1,
943         0x2210, 0x00007fb6, 0x002021b1,
944         0x2180, 0x00007fb6, 0x00002191,
945         0x2218, 0x00007fb6, 0x002121b1,
946         0x221c, 0x00007fb6, 0x002021b1,
947         0x21dc, 0x00007fb6, 0x00002191,
948         0x21e0, 0x00007fb6, 0x00002191,
949         0x3628, 0x0000003f, 0x0000000a,
950         0x362c, 0x0000003f, 0x0000000a,
951         0x2ae4, 0x00073ffe, 0x000022a2,
952         0x240c, 0x000007ff, 0x00000000,
953         0x8a14, 0xf000003f, 0x00000007,
954         0x8bf0, 0x00002001, 0x00000001,
955         0x8b24, 0xffffffff, 0x00ffffff,
956         0x30a04, 0x0000ff0f, 0x00000000,
957         0x28a4c, 0x07ffffff, 0x06000000,
958         0x4d8, 0x00000fff, 0x00000100,
959         0x3e78, 0x00000001, 0x00000002,
960         0x9100, 0x03000000, 0x0362c688,
961         0x8c00, 0x000000ff, 0x00000001,
962         0xe40, 0x00001fff, 0x00001fff,
963         0x9060, 0x0000007f, 0x00000020,
964         0x9508, 0x00010000, 0x00010000,
965         0xac14, 0x000003ff, 0x000000f3,
966         0xac0c, 0xffffffff, 0x00001032
967 };
968
969 static const u32 bonaire_mgcg_cgcg_init[] =
970 {
971         0xc420, 0xffffffff, 0xfffffffc,
972         0x30800, 0xffffffff, 0xe0000000,
973         0x3c2a0, 0xffffffff, 0x00000100,
974         0x3c208, 0xffffffff, 0x00000100,
975         0x3c2c0, 0xffffffff, 0xc0000100,
976         0x3c2c8, 0xffffffff, 0xc0000100,
977         0x3c2c4, 0xffffffff, 0xc0000100,
978         0x55e4, 0xffffffff, 0x00600100,
979         0x3c280, 0xffffffff, 0x00000100,
980         0x3c214, 0xffffffff, 0x06000100,
981         0x3c220, 0xffffffff, 0x00000100,
982         0x3c218, 0xffffffff, 0x06000100,
983         0x3c204, 0xffffffff, 0x00000100,
984         0x3c2e0, 0xffffffff, 0x00000100,
985         0x3c224, 0xffffffff, 0x00000100,
986         0x3c200, 0xffffffff, 0x00000100,
987         0x3c230, 0xffffffff, 0x00000100,
988         0x3c234, 0xffffffff, 0x00000100,
989         0x3c250, 0xffffffff, 0x00000100,
990         0x3c254, 0xffffffff, 0x00000100,
991         0x3c258, 0xffffffff, 0x00000100,
992         0x3c25c, 0xffffffff, 0x00000100,
993         0x3c260, 0xffffffff, 0x00000100,
994         0x3c27c, 0xffffffff, 0x00000100,
995         0x3c278, 0xffffffff, 0x00000100,
996         0x3c210, 0xffffffff, 0x06000100,
997         0x3c290, 0xffffffff, 0x00000100,
998         0x3c274, 0xffffffff, 0x00000100,
999         0x3c2b4, 0xffffffff, 0x00000100,
1000         0x3c2b0, 0xffffffff, 0x00000100,
1001         0x3c270, 0xffffffff, 0x00000100,
1002         0x30800, 0xffffffff, 0xe0000000,
1003         0x3c020, 0xffffffff, 0x00010000,
1004         0x3c024, 0xffffffff, 0x00030002,
1005         0x3c028, 0xffffffff, 0x00040007,
1006         0x3c02c, 0xffffffff, 0x00060005,
1007         0x3c030, 0xffffffff, 0x00090008,
1008         0x3c034, 0xffffffff, 0x00010000,
1009         0x3c038, 0xffffffff, 0x00030002,
1010         0x3c03c, 0xffffffff, 0x00040007,
1011         0x3c040, 0xffffffff, 0x00060005,
1012         0x3c044, 0xffffffff, 0x00090008,
1013         0x3c048, 0xffffffff, 0x00010000,
1014         0x3c04c, 0xffffffff, 0x00030002,
1015         0x3c050, 0xffffffff, 0x00040007,
1016         0x3c054, 0xffffffff, 0x00060005,
1017         0x3c058, 0xffffffff, 0x00090008,
1018         0x3c05c, 0xffffffff, 0x00010000,
1019         0x3c060, 0xffffffff, 0x00030002,
1020         0x3c064, 0xffffffff, 0x00040007,
1021         0x3c068, 0xffffffff, 0x00060005,
1022         0x3c06c, 0xffffffff, 0x00090008,
1023         0x3c070, 0xffffffff, 0x00010000,
1024         0x3c074, 0xffffffff, 0x00030002,
1025         0x3c078, 0xffffffff, 0x00040007,
1026         0x3c07c, 0xffffffff, 0x00060005,
1027         0x3c080, 0xffffffff, 0x00090008,
1028         0x3c084, 0xffffffff, 0x00010000,
1029         0x3c088, 0xffffffff, 0x00030002,
1030         0x3c08c, 0xffffffff, 0x00040007,
1031         0x3c090, 0xffffffff, 0x00060005,
1032         0x3c094, 0xffffffff, 0x00090008,
1033         0x3c098, 0xffffffff, 0x00010000,
1034         0x3c09c, 0xffffffff, 0x00030002,
1035         0x3c0a0, 0xffffffff, 0x00040007,
1036         0x3c0a4, 0xffffffff, 0x00060005,
1037         0x3c0a8, 0xffffffff, 0x00090008,
1038         0x3c000, 0xffffffff, 0x96e00200,
1039         0x8708, 0xffffffff, 0x00900100,
1040         0xc424, 0xffffffff, 0x0020003f,
1041         0x38, 0xffffffff, 0x0140001c,
1042         0x3c, 0x000f0000, 0x000f0000,
1043         0x220, 0xffffffff, 0xC060000C,
1044         0x224, 0xc0000fff, 0x00000100,
1045         0xf90, 0xffffffff, 0x00000100,
1046         0xf98, 0x00000101, 0x00000000,
1047         0x20a8, 0xffffffff, 0x00000104,
1048         0x55e4, 0xff000fff, 0x00000100,
1049         0x30cc, 0xc0000fff, 0x00000104,
1050         0xc1e4, 0x00000001, 0x00000001,
1051         0xd00c, 0xff000ff0, 0x00000100,
1052         0xd80c, 0xff000ff0, 0x00000100
1053 };
1054
1055 static const u32 spectre_golden_spm_registers[] =
1056 {
1057         0x30800, 0xe0ffffff, 0xe0000000
1058 };
1059
1060 static const u32 spectre_golden_common_registers[] =
1061 {
1062         0xc770, 0xffffffff, 0x00000800,
1063         0xc774, 0xffffffff, 0x00000800,
1064         0xc798, 0xffffffff, 0x00007fbf,
1065         0xc79c, 0xffffffff, 0x00007faf
1066 };
1067
1068 static const u32 spectre_golden_registers[] =
1069 {
1070         0x3c000, 0xffff1fff, 0x96940200,
1071         0x3c00c, 0xffff0001, 0xff000000,
1072         0x3c200, 0xfffc0fff, 0x00000100,
1073         0x6ed8, 0x00010101, 0x00010000,
1074         0x9834, 0xf00fffff, 0x00000400,
1075         0x9838, 0xfffffffc, 0x00020200,
1076         0x5bb0, 0x000000f0, 0x00000070,
1077         0x5bc0, 0xf0311fff, 0x80300000,
1078         0x98f8, 0x73773777, 0x12010001,
1079         0x9b7c, 0x00ff0000, 0x00fc0000,
1080         0x2f48, 0x73773777, 0x12010001,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8b24, 0xffffffff, 0x00ffffff,
1083         0x28350, 0x3f3f3fff, 0x00000082,
1084         0x28355, 0x0000003f, 0x00000000,
1085         0x3e78, 0x00000001, 0x00000002,
1086         0x913c, 0xffff03df, 0x00000004,
1087         0xc768, 0x00000008, 0x00000008,
1088         0x8c00, 0x000008ff, 0x00000800,
1089         0x9508, 0x00010000, 0x00010000,
1090         0xac0c, 0xffffffff, 0x54763210,
1091         0x214f8, 0x01ff01ff, 0x00000002,
1092         0x21498, 0x007ff800, 0x00200000,
1093         0x2015c, 0xffffffff, 0x00000f40,
1094         0x30934, 0xffffffff, 0x00000001
1095 };
1096
1097 static const u32 spectre_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0x00000100,
1104         0x3c2c8, 0xffffffff, 0x00000100,
1105         0x3c2c4, 0xffffffff, 0x00000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c0ac, 0xffffffff, 0x00010000,
1167         0x3c0b0, 0xffffffff, 0x00030002,
1168         0x3c0b4, 0xffffffff, 0x00040007,
1169         0x3c0b8, 0xffffffff, 0x00060005,
1170         0x3c0bc, 0xffffffff, 0x00090008,
1171         0x3c000, 0xffffffff, 0x96e00200,
1172         0x8708, 0xffffffff, 0x00900100,
1173         0xc424, 0xffffffff, 0x0020003f,
1174         0x38, 0xffffffff, 0x0140001c,
1175         0x3c, 0x000f0000, 0x000f0000,
1176         0x220, 0xffffffff, 0xC060000C,
1177         0x224, 0xc0000fff, 0x00000100,
1178         0xf90, 0xffffffff, 0x00000100,
1179         0xf98, 0x00000101, 0x00000000,
1180         0x20a8, 0xffffffff, 0x00000104,
1181         0x55e4, 0xff000fff, 0x00000100,
1182         0x30cc, 0xc0000fff, 0x00000104,
1183         0xc1e4, 0x00000001, 0x00000001,
1184         0xd00c, 0xff000ff0, 0x00000100,
1185         0xd80c, 0xff000ff0, 0x00000100
1186 };
1187
1188 static const u32 kalindi_golden_spm_registers[] =
1189 {
1190         0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192
1193 static const u32 kalindi_golden_common_registers[] =
1194 {
1195         0xc770, 0xffffffff, 0x00000800,
1196         0xc774, 0xffffffff, 0x00000800,
1197         0xc798, 0xffffffff, 0x00007fbf,
1198         0xc79c, 0xffffffff, 0x00007faf
1199 };
1200
1201 static const u32 kalindi_golden_registers[] =
1202 {
1203         0x3c000, 0xffffdfff, 0x6e944040,
1204         0x55e4, 0xff607fff, 0xfc000100,
1205         0x3c220, 0xff000fff, 0x00000100,
1206         0x3c224, 0xff000fff, 0x00000100,
1207         0x3c200, 0xfffc0fff, 0x00000100,
1208         0x6ed8, 0x00010101, 0x00010000,
1209         0x9830, 0xffffffff, 0x00000000,
1210         0x9834, 0xf00fffff, 0x00000400,
1211         0x5bb0, 0x000000f0, 0x00000070,
1212         0x5bc0, 0xf0311fff, 0x80300000,
1213         0x98f8, 0x73773777, 0x12010001,
1214         0x98fc, 0xffffffff, 0x00000010,
1215         0x9b7c, 0x00ff0000, 0x00fc0000,
1216         0x8030, 0x00001f0f, 0x0000100a,
1217         0x2f48, 0x73773777, 0x12010001,
1218         0x2408, 0x000fffff, 0x000c007f,
1219         0x8a14, 0xf000003f, 0x00000007,
1220         0x8b24, 0x3fff3fff, 0x00ffcfff,
1221         0x30a04, 0x0000ff0f, 0x00000000,
1222         0x28a4c, 0x07ffffff, 0x06000000,
1223         0x4d8, 0x00000fff, 0x00000100,
1224         0x3e78, 0x00000001, 0x00000002,
1225         0xc768, 0x00000008, 0x00000008,
1226         0x8c00, 0x000000ff, 0x00000003,
1227         0x214f8, 0x01ff01ff, 0x00000002,
1228         0x21498, 0x007ff800, 0x00200000,
1229         0x2015c, 0xffffffff, 0x00000f40,
1230         0x88c4, 0x001f3ae3, 0x00000082,
1231         0x88d4, 0x0000001f, 0x00000010,
1232         0x30934, 0xffffffff, 0x00000000
1233 };
1234
1235 static const u32 kalindi_mgcg_cgcg_init[] =
1236 {
1237         0xc420, 0xffffffff, 0xfffffffc,
1238         0x30800, 0xffffffff, 0xe0000000,
1239         0x3c2a0, 0xffffffff, 0x00000100,
1240         0x3c208, 0xffffffff, 0x00000100,
1241         0x3c2c0, 0xffffffff, 0x00000100,
1242         0x3c2c8, 0xffffffff, 0x00000100,
1243         0x3c2c4, 0xffffffff, 0x00000100,
1244         0x55e4, 0xffffffff, 0x00600100,
1245         0x3c280, 0xffffffff, 0x00000100,
1246         0x3c214, 0xffffffff, 0x06000100,
1247         0x3c220, 0xffffffff, 0x00000100,
1248         0x3c218, 0xffffffff, 0x06000100,
1249         0x3c204, 0xffffffff, 0x00000100,
1250         0x3c2e0, 0xffffffff, 0x00000100,
1251         0x3c224, 0xffffffff, 0x00000100,
1252         0x3c200, 0xffffffff, 0x00000100,
1253         0x3c230, 0xffffffff, 0x00000100,
1254         0x3c234, 0xffffffff, 0x00000100,
1255         0x3c250, 0xffffffff, 0x00000100,
1256         0x3c254, 0xffffffff, 0x00000100,
1257         0x3c258, 0xffffffff, 0x00000100,
1258         0x3c25c, 0xffffffff, 0x00000100,
1259         0x3c260, 0xffffffff, 0x00000100,
1260         0x3c27c, 0xffffffff, 0x00000100,
1261         0x3c278, 0xffffffff, 0x00000100,
1262         0x3c210, 0xffffffff, 0x06000100,
1263         0x3c290, 0xffffffff, 0x00000100,
1264         0x3c274, 0xffffffff, 0x00000100,
1265         0x3c2b4, 0xffffffff, 0x00000100,
1266         0x3c2b0, 0xffffffff, 0x00000100,
1267         0x3c270, 0xffffffff, 0x00000100,
1268         0x30800, 0xffffffff, 0xe0000000,
1269         0x3c020, 0xffffffff, 0x00010000,
1270         0x3c024, 0xffffffff, 0x00030002,
1271         0x3c028, 0xffffffff, 0x00040007,
1272         0x3c02c, 0xffffffff, 0x00060005,
1273         0x3c030, 0xffffffff, 0x00090008,
1274         0x3c034, 0xffffffff, 0x00010000,
1275         0x3c038, 0xffffffff, 0x00030002,
1276         0x3c03c, 0xffffffff, 0x00040007,
1277         0x3c040, 0xffffffff, 0x00060005,
1278         0x3c044, 0xffffffff, 0x00090008,
1279         0x3c000, 0xffffffff, 0x96e00200,
1280         0x8708, 0xffffffff, 0x00900100,
1281         0xc424, 0xffffffff, 0x0020003f,
1282         0x38, 0xffffffff, 0x0140001c,
1283         0x3c, 0x000f0000, 0x000f0000,
1284         0x220, 0xffffffff, 0xC060000C,
1285         0x224, 0xc0000fff, 0x00000100,
1286         0x20a8, 0xffffffff, 0x00000104,
1287         0x55e4, 0xff000fff, 0x00000100,
1288         0x30cc, 0xc0000fff, 0x00000104,
1289         0xc1e4, 0x00000001, 0x00000001,
1290         0xd00c, 0xff000ff0, 0x00000100,
1291         0xd80c, 0xff000ff0, 0x00000100
1292 };
1293
1294 static void cik_init_golden_registers(struct radeon_device *rdev)
1295 {
1296         switch (rdev->family) {
1297         case CHIP_BONAIRE:
1298                 radeon_program_register_sequence(rdev,
1299                                                  bonaire_mgcg_cgcg_init,
1300                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301                 radeon_program_register_sequence(rdev,
1302                                                  bonaire_golden_registers,
1303                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304                 radeon_program_register_sequence(rdev,
1305                                                  bonaire_golden_common_registers,
1306                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_golden_spm_registers,
1309                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1310                 break;
1311         case CHIP_KABINI:
1312                 radeon_program_register_sequence(rdev,
1313                                                  kalindi_mgcg_cgcg_init,
1314                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315                 radeon_program_register_sequence(rdev,
1316                                                  kalindi_golden_registers,
1317                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318                 radeon_program_register_sequence(rdev,
1319                                                  kalindi_golden_common_registers,
1320                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_golden_spm_registers,
1323                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1324                 break;
1325         case CHIP_KAVERI:
1326                 radeon_program_register_sequence(rdev,
1327                                                  spectre_mgcg_cgcg_init,
1328                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329                 radeon_program_register_sequence(rdev,
1330                                                  spectre_golden_registers,
1331                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1332                 radeon_program_register_sequence(rdev,
1333                                                  spectre_golden_common_registers,
1334                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_golden_spm_registers,
1337                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1338                 break;
1339         default:
1340                 break;
1341         }
1342 }
1343
1344 /**
1345  * cik_get_xclk - get the xclk
1346  *
1347  * @rdev: radeon_device pointer
1348  *
1349  * Returns the reference clock used by the gfx engine
1350  * (CIK).
1351  */
1352 u32 cik_get_xclk(struct radeon_device *rdev)
1353 {
1354         u32 reference_clock = rdev->clock.spll.reference_freq;
1355
1356         if (rdev->flags & RADEON_IS_IGP) {
1357                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358                         return reference_clock / 2;
1359         } else {
1360                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361                         return reference_clock / 4;
1362         }
1363         return reference_clock;
1364 }
1365
1366 /**
1367  * cik_mm_rdoorbell - read a doorbell dword
1368  *
1369  * @rdev: radeon_device pointer
1370  * @offset: byte offset into the aperture
1371  *
1372  * Returns the value in the doorbell aperture at the
1373  * requested offset (CIK).
1374  */
1375 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1376 {
1377         if (offset < rdev->doorbell.size) {
1378                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1379         } else {
1380                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1381                 return 0;
1382         }
1383 }
1384
1385 /**
1386  * cik_mm_wdoorbell - write a doorbell dword
1387  *
1388  * @rdev: radeon_device pointer
1389  * @offset: byte offset into the aperture
1390  * @v: value to write
1391  *
1392  * Writes @v to the doorbell aperture at the
1393  * requested offset (CIK).
1394  */
1395 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1396 {
1397         if (offset < rdev->doorbell.size) {
1398                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1399         } else {
1400                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1401         }
1402 }
1403
1404 #define BONAIRE_IO_MC_REGS_SIZE 36
1405
1406 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1407 {
1408         {0x00000070, 0x04400000},
1409         {0x00000071, 0x80c01803},
1410         {0x00000072, 0x00004004},
1411         {0x00000073, 0x00000100},
1412         {0x00000074, 0x00ff0000},
1413         {0x00000075, 0x34000000},
1414         {0x00000076, 0x08000014},
1415         {0x00000077, 0x00cc08ec},
1416         {0x00000078, 0x00000400},
1417         {0x00000079, 0x00000000},
1418         {0x0000007a, 0x04090000},
1419         {0x0000007c, 0x00000000},
1420         {0x0000007e, 0x4408a8e8},
1421         {0x0000007f, 0x00000304},
1422         {0x00000080, 0x00000000},
1423         {0x00000082, 0x00000001},
1424         {0x00000083, 0x00000002},
1425         {0x00000084, 0xf3e4f400},
1426         {0x00000085, 0x052024e3},
1427         {0x00000087, 0x00000000},
1428         {0x00000088, 0x01000000},
1429         {0x0000008a, 0x1c0a0000},
1430         {0x0000008b, 0xff010000},
1431         {0x0000008d, 0xffffefff},
1432         {0x0000008e, 0xfff3efff},
1433         {0x0000008f, 0xfff3efbf},
1434         {0x00000092, 0xf7ffffff},
1435         {0x00000093, 0xffffff7f},
1436         {0x00000095, 0x00101101},
1437         {0x00000096, 0x00000fff},
1438         {0x00000097, 0x00116fff},
1439         {0x00000098, 0x60010000},
1440         {0x00000099, 0x10010000},
1441         {0x0000009a, 0x00006000},
1442         {0x0000009b, 0x00001000},
1443         {0x0000009f, 0x00b48000}
1444 };
1445
1446 /**
1447  * cik_srbm_select - select specific register instances
1448  *
1449  * @rdev: radeon_device pointer
1450  * @me: selected ME (micro engine)
1451  * @pipe: pipe
1452  * @queue: queue
1453  * @vmid: VMID
1454  *
1455  * Switches the currently active registers instances.  Some
1456  * registers are instanced per VMID, others are instanced per
1457  * me/pipe/queue combination.
1458  */
1459 static void cik_srbm_select(struct radeon_device *rdev,
1460                             u32 me, u32 pipe, u32 queue, u32 vmid)
1461 {
1462         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1463                              MEID(me & 0x3) |
1464                              VMID(vmid & 0xf) |
1465                              QUEUEID(queue & 0x7));
1466         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1467 }
1468
1469 /* ucode loading */
1470 /**
1471  * ci_mc_load_microcode - load MC ucode into the hw
1472  *
1473  * @rdev: radeon_device pointer
1474  *
1475  * Load the GDDR MC ucode into the hw (CIK).
1476  * Returns 0 on success, error on failure.
1477  */
1478 static int ci_mc_load_microcode(struct radeon_device *rdev)
1479 {
1480         const __be32 *fw_data;
1481         u32 running, blackout = 0;
1482         u32 *io_mc_regs;
1483         int i, ucode_size, regs_size;
1484
1485         if (!rdev->mc_fw)
1486                 return -EINVAL;
1487
1488         switch (rdev->family) {
1489         case CHIP_BONAIRE:
1490         default:
1491                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492                 ucode_size = CIK_MC_UCODE_SIZE;
1493                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1494                 break;
1495         }
1496
1497         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1498
1499         if (running == 0) {
1500                 if (running) {
1501                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1503                 }
1504
1505                 /* reset the engine and set to writable */
1506                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1508
1509                 /* load mc io regs */
1510                 for (i = 0; i < regs_size; i++) {
1511                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1513                 }
1514                 /* load the MC ucode */
1515                 fw_data = (const __be32 *)rdev->mc_fw->data;
1516                 for (i = 0; i < ucode_size; i++)
1517                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1518
1519                 /* put the engine back into the active state */
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1523
1524                 /* wait for training to complete */
1525                 for (i = 0; i < rdev->usec_timeout; i++) {
1526                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1527                                 break;
1528                         udelay(1);
1529                 }
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1532                                 break;
1533                         udelay(1);
1534                 }
1535
1536                 if (running)
1537                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1538         }
1539
1540         return 0;
1541 }
1542
1543 /**
1544  * cik_init_microcode - load ucode images from disk
1545  *
1546  * @rdev: radeon_device pointer
1547  *
1548  * Use the firmware interface to load the ucode images into
1549  * the driver (not loaded into hw).
1550  * Returns 0 on success, error on failure.
1551  */
1552 static int cik_init_microcode(struct radeon_device *rdev)
1553 {
1554         const char *chip_name;
1555         size_t pfp_req_size, me_req_size, ce_req_size,
1556                 mec_req_size, rlc_req_size, mc_req_size,
1557                 sdma_req_size, smc_req_size;
1558         char fw_name[30];
1559         int err;
1560
1561         DRM_DEBUG("\n");
1562
1563         switch (rdev->family) {
1564         case CHIP_BONAIRE:
1565                 chip_name = "BONAIRE";
1566                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1568                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1572                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_KAVERI:
1576                 chip_name = "KAVERI";
1577                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1582                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1583                 break;
1584         case CHIP_KABINI:
1585                 chip_name = "KABINI";
1586                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1591                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592                 break;
1593         default: BUG();
1594         }
1595
1596         DRM_INFO("Loading %s Microcode\n", chip_name);
1597
1598         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1599         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1600         if (err)
1601                 goto out;
1602         if (rdev->pfp_fw->size != pfp_req_size) {
1603                 printk(KERN_ERR
1604                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605                        rdev->pfp_fw->size, fw_name);
1606                 err = -EINVAL;
1607                 goto out;
1608         }
1609
1610         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1611         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1612         if (err)
1613                 goto out;
1614         if (rdev->me_fw->size != me_req_size) {
1615                 printk(KERN_ERR
1616                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617                        rdev->me_fw->size, fw_name);
1618                 err = -EINVAL;
1619         }
1620
1621         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1622         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1623         if (err)
1624                 goto out;
1625         if (rdev->ce_fw->size != ce_req_size) {
1626                 printk(KERN_ERR
1627                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628                        rdev->ce_fw->size, fw_name);
1629                 err = -EINVAL;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1633         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->mec_fw->size != mec_req_size) {
1637                 printk(KERN_ERR
1638                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->mec_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1644         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->rlc_fw->size != rlc_req_size) {
1648                 printk(KERN_ERR
1649                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->rlc_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1655         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->sdma_fw->size != sdma_req_size) {
1659                 printk(KERN_ERR
1660                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->sdma_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         /* No SMC, MC ucode on APUs */
1666         if (!(rdev->flags & RADEON_IS_IGP)) {
1667                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1668                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1669                 if (err)
1670                         goto out;
1671                 if (rdev->mc_fw->size != mc_req_size) {
1672                         printk(KERN_ERR
1673                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674                                rdev->mc_fw->size, fw_name);
1675                         err = -EINVAL;
1676                 }
1677
1678                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1680                 if (err) {
1681                         printk(KERN_ERR
1682                                "smc: error loading firmware \"%s\"\n",
1683                                fw_name);
1684                         release_firmware(rdev->smc_fw);
1685                         rdev->smc_fw = NULL;
1686                 } else if (rdev->smc_fw->size != smc_req_size) {
1687                         printk(KERN_ERR
1688                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689                                rdev->smc_fw->size, fw_name);
1690                         err = -EINVAL;
1691                 }
1692         }
1693
1694 out:
1695         if (err) {
1696                 if (err != -EINVAL)
1697                         printk(KERN_ERR
1698                                "cik_cp: Failed to load firmware \"%s\"\n",
1699                                fw_name);
1700                 release_firmware(rdev->pfp_fw);
1701                 rdev->pfp_fw = NULL;
1702                 release_firmware(rdev->me_fw);
1703                 rdev->me_fw = NULL;
1704                 release_firmware(rdev->ce_fw);
1705                 rdev->ce_fw = NULL;
1706                 release_firmware(rdev->rlc_fw);
1707                 rdev->rlc_fw = NULL;
1708                 release_firmware(rdev->mc_fw);
1709                 rdev->mc_fw = NULL;
1710                 release_firmware(rdev->smc_fw);
1711                 rdev->smc_fw = NULL;
1712         }
1713         return err;
1714 }
1715
1716 /*
1717  * Core functions
1718  */
1719 /**
1720  * cik_tiling_mode_table_init - init the hw tiling table
1721  *
1722  * @rdev: radeon_device pointer
1723  *
1724  * Starting with SI, the tiling setup is done globally in a
1725  * set of 32 tiling modes.  Rather than selecting each set of
1726  * parameters per surface as on older asics, we just select
1727  * which index in the tiling table we want to use, and the
1728  * surface uses those parameters (CIK).
1729  */
1730 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1731 {
1732         const u32 num_tile_mode_states = 32;
1733         const u32 num_secondary_tile_mode_states = 16;
1734         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735         u32 num_pipe_configs;
1736         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737                 rdev->config.cik.max_shader_engines;
1738
1739         switch (rdev->config.cik.mem_row_size_in_kb) {
1740         case 1:
1741                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1742                 break;
1743         case 2:
1744         default:
1745                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1746                 break;
1747         case 4:
1748                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1749                 break;
1750         }
1751
1752         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753         if (num_pipe_configs > 8)
1754                 num_pipe_configs = 8; /* ??? */
1755
1756         if (num_pipe_configs == 8) {
1757                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758                         switch (reg_offset) {
1759                         case 0:
1760                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1764                                 break;
1765                         case 1:
1766                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1770                                 break;
1771                         case 2:
1772                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1776                                 break;
1777                         case 3:
1778                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1782                                 break;
1783                         case 4:
1784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787                                                  TILE_SPLIT(split_equal_to_row_size));
1788                                 break;
1789                         case 5:
1790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1792                                 break;
1793                         case 6:
1794                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1798                                 break;
1799                         case 7:
1800                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803                                                  TILE_SPLIT(split_equal_to_row_size));
1804                                 break;
1805                         case 8:
1806                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1808                                 break;
1809                         case 9:
1810                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1812                                 break;
1813                         case 10:
1814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1818                                 break;
1819                         case 11:
1820                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1824                                 break;
1825                         case 12:
1826                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830                                 break;
1831                         case 13:
1832                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1834                                 break;
1835                         case 14:
1836                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1840                                 break;
1841                         case 16:
1842                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1846                                 break;
1847                         case 17:
1848                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852                                 break;
1853                         case 27:
1854                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1856                                 break;
1857                         case 28:
1858                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1862                                 break;
1863                         case 29:
1864                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1868                                 break;
1869                         case 30:
1870                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874                                 break;
1875                         default:
1876                                 gb_tile_moden = 0;
1877                                 break;
1878                         }
1879                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1880                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1881                 }
1882                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883                         switch (reg_offset) {
1884                         case 0:
1885                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1889                                 break;
1890                         case 1:
1891                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1895                                 break;
1896                         case 2:
1897                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1901                                 break;
1902                         case 3:
1903                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1907                                 break;
1908                         case 4:
1909                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1913                                 break;
1914                         case 5:
1915                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1919                                 break;
1920                         case 6:
1921                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1925                                 break;
1926                         case 8:
1927                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1931                                 break;
1932                         case 9:
1933                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1937                                 break;
1938                         case 10:
1939                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1943                                 break;
1944                         case 11:
1945                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1949                                 break;
1950                         case 12:
1951                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1955                                 break;
1956                         case 13:
1957                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1961                                 break;
1962                         case 14:
1963                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1967                                 break;
1968                         default:
1969                                 gb_tile_moden = 0;
1970                                 break;
1971                         }
1972                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1973                 }
1974         } else if (num_pipe_configs == 4) {
1975                 if (num_rbs == 4) {
1976                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977                                 switch (reg_offset) {
1978                                 case 0:
1979                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1983                                         break;
1984                                 case 1:
1985                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1989                                         break;
1990                                 case 2:
1991                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1995                                         break;
1996                                 case 3:
1997                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2001                                         break;
2002                                 case 4:
2003                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006                                                          TILE_SPLIT(split_equal_to_row_size));
2007                                         break;
2008                                 case 5:
2009                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2011                                         break;
2012                                 case 6:
2013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017                                         break;
2018                                 case 7:
2019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022                                                          TILE_SPLIT(split_equal_to_row_size));
2023                                         break;
2024                                 case 8:
2025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2027                                         break;
2028                                 case 9:
2029                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2031                                         break;
2032                                 case 10:
2033                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2037                                         break;
2038                                 case 11:
2039                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043                                         break;
2044                                 case 12:
2045                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049                                         break;
2050                                 case 13:
2051                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2053                                         break;
2054                                 case 14:
2055                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059                                         break;
2060                                 case 16:
2061                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065                                         break;
2066                                 case 17:
2067                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                         break;
2072                                 case 27:
2073                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2075                                         break;
2076                                 case 28:
2077                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081                                         break;
2082                                 case 29:
2083                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                                         break;
2088                                 case 30:
2089                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                                         break;
2094                                 default:
2095                                         gb_tile_moden = 0;
2096                                         break;
2097                                 }
2098                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2099                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2100                         }
2101                 } else if (num_rbs < 4) {
2102                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103                                 switch (reg_offset) {
2104                                 case 0:
2105                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2109                                         break;
2110                                 case 1:
2111                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2115                                         break;
2116                                 case 2:
2117                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2121                                         break;
2122                                 case 3:
2123                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2127                                         break;
2128                                 case 4:
2129                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132                                                          TILE_SPLIT(split_equal_to_row_size));
2133                                         break;
2134                                 case 5:
2135                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137                                         break;
2138                                 case 6:
2139                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2143                                         break;
2144                                 case 7:
2145                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148                                                          TILE_SPLIT(split_equal_to_row_size));
2149                                         break;
2150                                 case 8:
2151                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2153                                         break;
2154                                 case 9:
2155                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2157                                         break;
2158                                 case 10:
2159                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163                                         break;
2164                                 case 11:
2165                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169                                         break;
2170                                 case 12:
2171                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175                                         break;
2176                                 case 13:
2177                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2179                                         break;
2180                                 case 14:
2181                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185                                         break;
2186                                 case 16:
2187                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191                                         break;
2192                                 case 17:
2193                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                                         break;
2198                                 case 27:
2199                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2201                                         break;
2202                                 case 28:
2203                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207                                         break;
2208                                 case 29:
2209                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213                                         break;
2214                                 case 30:
2215                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219                                         break;
2220                                 default:
2221                                         gb_tile_moden = 0;
2222                                         break;
2223                                 }
2224                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2225                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2226                         }
2227                 }
2228                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229                         switch (reg_offset) {
2230                         case 0:
2231                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2235                                 break;
2236                         case 1:
2237                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2241                                 break;
2242                         case 2:
2243                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2247                                 break;
2248                         case 3:
2249                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2253                                 break;
2254                         case 4:
2255                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2259                                 break;
2260                         case 5:
2261                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2265                                 break;
2266                         case 6:
2267                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2271                                 break;
2272                         case 8:
2273                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2277                                 break;
2278                         case 9:
2279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2283                                 break;
2284                         case 10:
2285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2289                                 break;
2290                         case 11:
2291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                                 break;
2296                         case 12:
2297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                                 break;
2302                         case 13:
2303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2307                                 break;
2308                         case 14:
2309                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2313                                 break;
2314                         default:
2315                                 gb_tile_moden = 0;
2316                                 break;
2317                         }
2318                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2319                 }
2320         } else if (num_pipe_configs == 2) {
2321                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322                         switch (reg_offset) {
2323                         case 0:
2324                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2328                                 break;
2329                         case 1:
2330                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2333                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2334                                 break;
2335                         case 2:
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2340                                 break;
2341                         case 3:
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2346                                 break;
2347                         case 4:
2348                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                                  TILE_SPLIT(split_equal_to_row_size));
2352                                 break;
2353                         case 5:
2354                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356                                 break;
2357                         case 6:
2358                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2361                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2362                                 break;
2363                         case 7:
2364                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2367                                                  TILE_SPLIT(split_equal_to_row_size));
2368                                 break;
2369                         case 8:
2370                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2371                                 break;
2372                         case 9:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2375                                 break;
2376                         case 10:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 11:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 12:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2392                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393                                 break;
2394                         case 13:
2395                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2397                                 break;
2398                         case 14:
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2402                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403                                 break;
2404                         case 16:
2405                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2408                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                                 break;
2410                         case 17:
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2414                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                                 break;
2416                         case 27:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2419                                 break;
2420                         case 28:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2424                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                                 break;
2426                         case 29:
2427                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2430                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                                 break;
2432                         case 30:
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2436                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                                 break;
2438                         default:
2439                                 gb_tile_moden = 0;
2440                                 break;
2441                         }
2442                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2443                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2444                 }
2445                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446                         switch (reg_offset) {
2447                         case 0:
2448                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2452                                 break;
2453                         case 1:
2454                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2458                                 break;
2459                         case 2:
2460                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2464                                 break;
2465                         case 3:
2466                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2470                                 break;
2471                         case 4:
2472                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2476                                 break;
2477                         case 5:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 6:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                                 break;
2489                         case 8:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2494                                 break;
2495                         case 9:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2500                                 break;
2501                         case 10:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2506                                 break;
2507                         case 11:
2508                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2512                                 break;
2513                         case 12:
2514                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2518                                 break;
2519                         case 13:
2520                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                                 break;
2525                         case 14:
2526                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2530                                 break;
2531                         default:
2532                                 gb_tile_moden = 0;
2533                                 break;
2534                         }
2535                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2536                 }
2537         } else
2538                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2539 }
2540
2541 /**
2542  * cik_select_se_sh - select which SE, SH to address
2543  *
2544  * @rdev: radeon_device pointer
2545  * @se_num: shader engine to address
2546  * @sh_num: sh block to address
2547  *
2548  * Select which SE, SH combinations to address. Certain
2549  * registers are instanced per SE or SH.  0xffffffff means
2550  * broadcast to all SEs or SHs (CIK).
2551  */
2552 static void cik_select_se_sh(struct radeon_device *rdev,
2553                              u32 se_num, u32 sh_num)
2554 {
2555         u32 data = INSTANCE_BROADCAST_WRITES;
2556
2557         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2558                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2559         else if (se_num == 0xffffffff)
2560                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561         else if (sh_num == 0xffffffff)
2562                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2563         else
2564                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565         WREG32(GRBM_GFX_INDEX, data);
2566 }
2567
2568 /**
2569  * cik_create_bitmask - create a bitmask
2570  *
2571  * @bit_width: length of the mask
2572  *
2573  * create a variable length bit mask (CIK).
2574  * Returns the bitmask.
2575  */
2576 static u32 cik_create_bitmask(u32 bit_width)
2577 {
2578         u32 i, mask = 0;
2579
2580         for (i = 0; i < bit_width; i++) {
2581                 mask <<= 1;
2582                 mask |= 1;
2583         }
2584         return mask;
2585 }
2586
2587 /**
2588  * cik_select_se_sh - select which SE, SH to address
2589  *
2590  * @rdev: radeon_device pointer
2591  * @max_rb_num: max RBs (render backends) for the asic
2592  * @se_num: number of SEs (shader engines) for the asic
2593  * @sh_per_se: number of SH blocks per SE for the asic
2594  *
2595  * Calculates the bitmask of disabled RBs (CIK).
2596  * Returns the disabled RB bitmask.
2597  */
2598 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599                               u32 max_rb_num, u32 se_num,
2600                               u32 sh_per_se)
2601 {
2602         u32 data, mask;
2603
2604         data = RREG32(CC_RB_BACKEND_DISABLE);
2605         if (data & 1)
2606                 data &= BACKEND_DISABLE_MASK;
2607         else
2608                 data = 0;
2609         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2610
2611         data >>= BACKEND_DISABLE_SHIFT;
2612
2613         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2614
2615         return data & mask;
2616 }
2617
2618 /**
2619  * cik_setup_rb - setup the RBs on the asic
2620  *
2621  * @rdev: radeon_device pointer
2622  * @se_num: number of SEs (shader engines) for the asic
2623  * @sh_per_se: number of SH blocks per SE for the asic
2624  * @max_rb_num: max RBs (render backends) for the asic
2625  *
2626  * Configures per-SE/SH RB registers (CIK).
2627  */
2628 static void cik_setup_rb(struct radeon_device *rdev,
2629                          u32 se_num, u32 sh_per_se,
2630                          u32 max_rb_num)
2631 {
2632         int i, j;
2633         u32 data, mask;
2634         u32 disabled_rbs = 0;
2635         u32 enabled_rbs = 0;
2636
2637         for (i = 0; i < se_num; i++) {
2638                 for (j = 0; j < sh_per_se; j++) {
2639                         cik_select_se_sh(rdev, i, j);
2640                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2642                 }
2643         }
2644         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2645
2646         mask = 1;
2647         for (i = 0; i < max_rb_num; i++) {
2648                 if (!(disabled_rbs & mask))
2649                         enabled_rbs |= mask;
2650                 mask <<= 1;
2651         }
2652
2653         for (i = 0; i < se_num; i++) {
2654                 cik_select_se_sh(rdev, i, 0xffffffff);
2655                 data = 0;
2656                 for (j = 0; j < sh_per_se; j++) {
2657                         switch (enabled_rbs & 3) {
2658                         case 1:
2659                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2660                                 break;
2661                         case 2:
2662                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2663                                 break;
2664                         case 3:
2665                         default:
2666                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2667                                 break;
2668                         }
2669                         enabled_rbs >>= 2;
2670                 }
2671                 WREG32(PA_SC_RASTER_CONFIG, data);
2672         }
2673         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2674 }
2675
2676 /**
2677  * cik_gpu_init - setup the 3D engine
2678  *
2679  * @rdev: radeon_device pointer
2680  *
2681  * Configures the 3D engine and tiling configuration
2682  * registers so that the 3D engine is usable.
2683  */
2684 static void cik_gpu_init(struct radeon_device *rdev)
2685 {
2686         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687         u32 mc_shared_chmap, mc_arb_ramcfg;
2688         u32 hdp_host_path_cntl;
2689         u32 tmp;
2690         int i, j;
2691
2692         switch (rdev->family) {
2693         case CHIP_BONAIRE:
2694                 rdev->config.cik.max_shader_engines = 2;
2695                 rdev->config.cik.max_tile_pipes = 4;
2696                 rdev->config.cik.max_cu_per_sh = 7;
2697                 rdev->config.cik.max_sh_per_se = 1;
2698                 rdev->config.cik.max_backends_per_se = 2;
2699                 rdev->config.cik.max_texture_channel_caches = 4;
2700                 rdev->config.cik.max_gprs = 256;
2701                 rdev->config.cik.max_gs_threads = 32;
2702                 rdev->config.cik.max_hw_contexts = 8;
2703
2704                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2709                 break;
2710         case CHIP_KAVERI:
2711                 rdev->config.cik.max_shader_engines = 1;
2712                 rdev->config.cik.max_tile_pipes = 4;
2713                 if ((rdev->pdev->device == 0x1304) ||
2714                     (rdev->pdev->device == 0x1305) ||
2715                     (rdev->pdev->device == 0x130C) ||
2716                     (rdev->pdev->device == 0x130F) ||
2717                     (rdev->pdev->device == 0x1310) ||
2718                     (rdev->pdev->device == 0x1311) ||
2719                     (rdev->pdev->device == 0x131C)) {
2720                         rdev->config.cik.max_cu_per_sh = 8;
2721                         rdev->config.cik.max_backends_per_se = 2;
2722                 } else if ((rdev->pdev->device == 0x1309) ||
2723                            (rdev->pdev->device == 0x130A) ||
2724                            (rdev->pdev->device == 0x130D) ||
2725                            (rdev->pdev->device == 0x1313)) {
2726                         rdev->config.cik.max_cu_per_sh = 6;
2727                         rdev->config.cik.max_backends_per_se = 2;
2728                 } else if ((rdev->pdev->device == 0x1306) ||
2729                            (rdev->pdev->device == 0x1307) ||
2730                            (rdev->pdev->device == 0x130B) ||
2731                            (rdev->pdev->device == 0x130E) ||
2732                            (rdev->pdev->device == 0x1315) ||
2733                            (rdev->pdev->device == 0x131B)) {
2734                         rdev->config.cik.max_cu_per_sh = 4;
2735                         rdev->config.cik.max_backends_per_se = 1;
2736                 } else {
2737                         rdev->config.cik.max_cu_per_sh = 3;
2738                         rdev->config.cik.max_backends_per_se = 1;
2739                 }
2740                 rdev->config.cik.max_sh_per_se = 1;
2741                 rdev->config.cik.max_texture_channel_caches = 4;
2742                 rdev->config.cik.max_gprs = 256;
2743                 rdev->config.cik.max_gs_threads = 16;
2744                 rdev->config.cik.max_hw_contexts = 8;
2745
2746                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2747                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2748                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2749                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2750                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2751                 break;
2752         case CHIP_KABINI:
2753         default:
2754                 rdev->config.cik.max_shader_engines = 1;
2755                 rdev->config.cik.max_tile_pipes = 2;
2756                 rdev->config.cik.max_cu_per_sh = 2;
2757                 rdev->config.cik.max_sh_per_se = 1;
2758                 rdev->config.cik.max_backends_per_se = 1;
2759                 rdev->config.cik.max_texture_channel_caches = 2;
2760                 rdev->config.cik.max_gprs = 256;
2761                 rdev->config.cik.max_gs_threads = 16;
2762                 rdev->config.cik.max_hw_contexts = 8;
2763
2764                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2765                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2766                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2767                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2768                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2769                 break;
2770         }
2771
2772         /* Initialize HDP */
2773         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2774                 WREG32((0x2c14 + j), 0x00000000);
2775                 WREG32((0x2c18 + j), 0x00000000);
2776                 WREG32((0x2c1c + j), 0x00000000);
2777                 WREG32((0x2c20 + j), 0x00000000);
2778                 WREG32((0x2c24 + j), 0x00000000);
2779         }
2780
2781         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2782
2783         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2784
2785         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2786         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2787
2788         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2789         rdev->config.cik.mem_max_burst_length_bytes = 256;
2790         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2791         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2792         if (rdev->config.cik.mem_row_size_in_kb > 4)
2793                 rdev->config.cik.mem_row_size_in_kb = 4;
2794         /* XXX use MC settings? */
2795         rdev->config.cik.shader_engine_tile_size = 32;
2796         rdev->config.cik.num_gpus = 1;
2797         rdev->config.cik.multi_gpu_tile_size = 64;
2798
2799         /* fix up row size */
2800         gb_addr_config &= ~ROW_SIZE_MASK;
2801         switch (rdev->config.cik.mem_row_size_in_kb) {
2802         case 1:
2803         default:
2804                 gb_addr_config |= ROW_SIZE(0);
2805                 break;
2806         case 2:
2807                 gb_addr_config |= ROW_SIZE(1);
2808                 break;
2809         case 4:
2810                 gb_addr_config |= ROW_SIZE(2);
2811                 break;
2812         }
2813
2814         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2815          * not have bank info, so create a custom tiling dword.
2816          * bits 3:0   num_pipes
2817          * bits 7:4   num_banks
2818          * bits 11:8  group_size
2819          * bits 15:12 row_size
2820          */
2821         rdev->config.cik.tile_config = 0;
2822         switch (rdev->config.cik.num_tile_pipes) {
2823         case 1:
2824                 rdev->config.cik.tile_config |= (0 << 0);
2825                 break;
2826         case 2:
2827                 rdev->config.cik.tile_config |= (1 << 0);
2828                 break;
2829         case 4:
2830                 rdev->config.cik.tile_config |= (2 << 0);
2831                 break;
2832         case 8:
2833         default:
2834                 /* XXX what about 12? */
2835                 rdev->config.cik.tile_config |= (3 << 0);
2836                 break;
2837         }
2838         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2839                 rdev->config.cik.tile_config |= 1 << 4;
2840         else
2841                 rdev->config.cik.tile_config |= 0 << 4;
2842         rdev->config.cik.tile_config |=
2843                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2844         rdev->config.cik.tile_config |=
2845                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2846
2847         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2848         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2849         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2850         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2851         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2852         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2853         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2854         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2855
2856         cik_tiling_mode_table_init(rdev);
2857
2858         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2859                      rdev->config.cik.max_sh_per_se,
2860                      rdev->config.cik.max_backends_per_se);
2861
2862         /* set HW defaults for 3D engine */
2863         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2864
2865         WREG32(SX_DEBUG_1, 0x20);
2866
2867         WREG32(TA_CNTL_AUX, 0x00010000);
2868
2869         tmp = RREG32(SPI_CONFIG_CNTL);
2870         tmp |= 0x03000000;
2871         WREG32(SPI_CONFIG_CNTL, tmp);
2872
2873         WREG32(SQ_CONFIG, 1);
2874
2875         WREG32(DB_DEBUG, 0);
2876
2877         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2878         tmp |= 0x00000400;
2879         WREG32(DB_DEBUG2, tmp);
2880
2881         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2882         tmp |= 0x00020200;
2883         WREG32(DB_DEBUG3, tmp);
2884
2885         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2886         tmp |= 0x00018208;
2887         WREG32(CB_HW_CONTROL, tmp);
2888
2889         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2890
2891         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2892                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2893                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2894                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2895
2896         WREG32(VGT_NUM_INSTANCES, 1);
2897
2898         WREG32(CP_PERFMON_CNTL, 0);
2899
2900         WREG32(SQ_CONFIG, 0);
2901
2902         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2903                                           FORCE_EOV_MAX_REZ_CNT(255)));
2904
2905         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2906                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2907
2908         WREG32(VGT_GS_VERTEX_REUSE, 16);
2909         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2910
2911         tmp = RREG32(HDP_MISC_CNTL);
2912         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2913         WREG32(HDP_MISC_CNTL, tmp);
2914
2915         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2916         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2917
2918         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2919         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2920
2921         udelay(50);
2922 }
2923
2924 /*
2925  * GPU scratch registers helpers function.
2926  */
2927 /**
2928  * cik_scratch_init - setup driver info for CP scratch regs
2929  *
2930  * @rdev: radeon_device pointer
2931  *
2932  * Set up the number and offset of the CP scratch registers.
2933  * NOTE: use of CP scratch registers is a legacy inferface and
2934  * is not used by default on newer asics (r6xx+).  On newer asics,
2935  * memory buffers are used for fences rather than scratch regs.
2936  */
2937 static void cik_scratch_init(struct radeon_device *rdev)
2938 {
2939         int i;
2940
2941         rdev->scratch.num_reg = 7;
2942         rdev->scratch.reg_base = SCRATCH_REG0;
2943         for (i = 0; i < rdev->scratch.num_reg; i++) {
2944                 rdev->scratch.free[i] = true;
2945                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2946         }
2947 }
2948
2949 /**
2950  * cik_ring_test - basic gfx ring test
2951  *
2952  * @rdev: radeon_device pointer
2953  * @ring: radeon_ring structure holding ring information
2954  *
2955  * Allocate a scratch register and write to it using the gfx ring (CIK).
2956  * Provides a basic gfx ring test to verify that the ring is working.
2957  * Used by cik_cp_gfx_resume();
2958  * Returns 0 on success, error on failure.
2959  */
2960 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2961 {
2962         uint32_t scratch;
2963         uint32_t tmp = 0;
2964         unsigned i;
2965         int r;
2966
2967         r = radeon_scratch_get(rdev, &scratch);
2968         if (r) {
2969                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2970                 return r;
2971         }
2972         WREG32(scratch, 0xCAFEDEAD);
2973         r = radeon_ring_lock(rdev, ring, 3);
2974         if (r) {
2975                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2976                 radeon_scratch_free(rdev, scratch);
2977                 return r;
2978         }
2979         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2980         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2981         radeon_ring_write(ring, 0xDEADBEEF);
2982         radeon_ring_unlock_commit(rdev, ring);
2983
2984         for (i = 0; i < rdev->usec_timeout; i++) {
2985                 tmp = RREG32(scratch);
2986                 if (tmp == 0xDEADBEEF)
2987                         break;
2988                 DRM_UDELAY(1);
2989         }
2990         if (i < rdev->usec_timeout) {
2991                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2992         } else {
2993                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2994                           ring->idx, scratch, tmp);
2995                 r = -EINVAL;
2996         }
2997         radeon_scratch_free(rdev, scratch);
2998         return r;
2999 }
3000
3001 /**
3002  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3003  *
3004  * @rdev: radeon_device pointer
3005  * @fence: radeon fence object
3006  *
3007  * Emits a fence sequnce number on the gfx ring and flushes
3008  * GPU caches.
3009  */
3010 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3011                              struct radeon_fence *fence)
3012 {
3013         struct radeon_ring *ring = &rdev->ring[fence->ring];
3014         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3015
3016         /* EVENT_WRITE_EOP - flush caches, send int */
3017         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3018         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3019                                  EOP_TC_ACTION_EN |
3020                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3021                                  EVENT_INDEX(5)));
3022         radeon_ring_write(ring, addr & 0xfffffffc);
3023         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3024         radeon_ring_write(ring, fence->seq);
3025         radeon_ring_write(ring, 0);
3026         /* HDP flush */
3027         /* We should be using the new WAIT_REG_MEM special op packet here
3028          * but it causes the CP to hang
3029          */
3030         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3031         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3032                                  WRITE_DATA_DST_SEL(0)));
3033         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3034         radeon_ring_write(ring, 0);
3035         radeon_ring_write(ring, 0);
3036 }
3037
3038 /**
3039  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3040  *
3041  * @rdev: radeon_device pointer
3042  * @fence: radeon fence object
3043  *
3044  * Emits a fence sequnce number on the compute ring and flushes
3045  * GPU caches.
3046  */
3047 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3048                                  struct radeon_fence *fence)
3049 {
3050         struct radeon_ring *ring = &rdev->ring[fence->ring];
3051         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3052
3053         /* RELEASE_MEM - flush caches, send int */
3054         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3055         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3056                                  EOP_TC_ACTION_EN |
3057                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3058                                  EVENT_INDEX(5)));
3059         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3060         radeon_ring_write(ring, addr & 0xfffffffc);
3061         radeon_ring_write(ring, upper_32_bits(addr));
3062         radeon_ring_write(ring, fence->seq);
3063         radeon_ring_write(ring, 0);
3064         /* HDP flush */
3065         /* We should be using the new WAIT_REG_MEM special op packet here
3066          * but it causes the CP to hang
3067          */
3068         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3069         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3070                                  WRITE_DATA_DST_SEL(0)));
3071         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3072         radeon_ring_write(ring, 0);
3073         radeon_ring_write(ring, 0);
3074 }
3075
3076 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3077                              struct radeon_ring *ring,
3078                              struct radeon_semaphore *semaphore,
3079                              bool emit_wait)
3080 {
3081         uint64_t addr = semaphore->gpu_addr;
3082         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3083
3084         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3085         radeon_ring_write(ring, addr & 0xffffffff);
3086         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3087 }
3088
3089 /*
3090  * IB stuff
3091  */
3092 /**
3093  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3094  *
3095  * @rdev: radeon_device pointer
3096  * @ib: radeon indirect buffer object
3097  *
3098  * Emits an DE (drawing engine) or CE (constant engine) IB
3099  * on the gfx ring.  IBs are usually generated by userspace
3100  * acceleration drivers and submitted to the kernel for
3101  * sheduling on the ring.  This function schedules the IB
3102  * on the gfx ring for execution by the GPU.
3103  */
3104 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3105 {
3106         struct radeon_ring *ring = &rdev->ring[ib->ring];
3107         u32 header, control = INDIRECT_BUFFER_VALID;
3108
3109         if (ib->is_const_ib) {
3110                 /* set switch buffer packet before const IB */
3111                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3112                 radeon_ring_write(ring, 0);
3113
3114                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3115         } else {
3116                 u32 next_rptr;
3117                 if (ring->rptr_save_reg) {
3118                         next_rptr = ring->wptr + 3 + 4;
3119                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3120                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3121                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3122                         radeon_ring_write(ring, next_rptr);
3123                 } else if (rdev->wb.enabled) {
3124                         next_rptr = ring->wptr + 5 + 4;
3125                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3126                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3127                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3128                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3129                         radeon_ring_write(ring, next_rptr);
3130                 }
3131
3132                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3133         }
3134
3135         control |= ib->length_dw |
3136                 (ib->vm ? (ib->vm->id << 24) : 0);
3137
3138         radeon_ring_write(ring, header);
3139         radeon_ring_write(ring,
3140 #ifdef __BIG_ENDIAN
3141                           (2 << 0) |
3142 #endif
3143                           (ib->gpu_addr & 0xFFFFFFFC));
3144         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3145         radeon_ring_write(ring, control);
3146 }
3147
3148 /**
3149  * cik_ib_test - basic gfx ring IB test
3150  *
3151  * @rdev: radeon_device pointer
3152  * @ring: radeon_ring structure holding ring information
3153  *
3154  * Allocate an IB and execute it on the gfx ring (CIK).
3155  * Provides a basic gfx ring test to verify that IBs are working.
3156  * Returns 0 on success, error on failure.
3157  */
3158 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3159 {
3160         struct radeon_ib ib;
3161         uint32_t scratch;
3162         uint32_t tmp = 0;
3163         unsigned i;
3164         int r;
3165
3166         r = radeon_scratch_get(rdev, &scratch);
3167         if (r) {
3168                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3169                 return r;
3170         }
3171         WREG32(scratch, 0xCAFEDEAD);
3172         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3173         if (r) {
3174                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3175                 return r;
3176         }
3177         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3178         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3179         ib.ptr[2] = 0xDEADBEEF;
3180         ib.length_dw = 3;
3181         r = radeon_ib_schedule(rdev, &ib, NULL);
3182         if (r) {
3183                 radeon_scratch_free(rdev, scratch);
3184                 radeon_ib_free(rdev, &ib);
3185                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3186                 return r;
3187         }
3188         r = radeon_fence_wait(ib.fence, false);
3189         if (r) {
3190                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3191                 return r;
3192         }
3193         for (i = 0; i < rdev->usec_timeout; i++) {
3194                 tmp = RREG32(scratch);
3195                 if (tmp == 0xDEADBEEF)
3196                         break;
3197                 DRM_UDELAY(1);
3198         }
3199         if (i < rdev->usec_timeout) {
3200                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3201         } else {
3202                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3203                           scratch, tmp);
3204                 r = -EINVAL;
3205         }
3206         radeon_scratch_free(rdev, scratch);
3207         radeon_ib_free(rdev, &ib);
3208         return r;
3209 }
3210
3211 /*
3212  * CP.
3213  * On CIK, gfx and compute now have independant command processors.
3214  *
3215  * GFX
3216  * Gfx consists of a single ring and can process both gfx jobs and
3217  * compute jobs.  The gfx CP consists of three microengines (ME):
3218  * PFP - Pre-Fetch Parser
3219  * ME - Micro Engine
3220  * CE - Constant Engine
3221  * The PFP and ME make up what is considered the Drawing Engine (DE).
3222  * The CE is an asynchronous engine used for updating buffer desciptors
3223  * used by the DE so that they can be loaded into cache in parallel
3224  * while the DE is processing state update packets.
3225  *
3226  * Compute
3227  * The compute CP consists of two microengines (ME):
3228  * MEC1 - Compute MicroEngine 1
3229  * MEC2 - Compute MicroEngine 2
3230  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3231  * The queues are exposed to userspace and are programmed directly
3232  * by the compute runtime.
3233  */
3234 /**
3235  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3236  *
3237  * @rdev: radeon_device pointer
3238  * @enable: enable or disable the MEs
3239  *
3240  * Halts or unhalts the gfx MEs.
3241  */
3242 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3243 {
3244         if (enable)
3245                 WREG32(CP_ME_CNTL, 0);
3246         else {
3247                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3248                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3249         }
3250         udelay(50);
3251 }
3252
3253 /**
3254  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3255  *
3256  * @rdev: radeon_device pointer
3257  *
3258  * Loads the gfx PFP, ME, and CE ucode.
3259  * Returns 0 for success, -EINVAL if the ucode is not available.
3260  */
3261 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3262 {
3263         const __be32 *fw_data;
3264         int i;
3265
3266         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3267                 return -EINVAL;
3268
3269         cik_cp_gfx_enable(rdev, false);
3270
3271         /* PFP */
3272         fw_data = (const __be32 *)rdev->pfp_fw->data;
3273         WREG32(CP_PFP_UCODE_ADDR, 0);
3274         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3275                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3276         WREG32(CP_PFP_UCODE_ADDR, 0);
3277
3278         /* CE */
3279         fw_data = (const __be32 *)rdev->ce_fw->data;
3280         WREG32(CP_CE_UCODE_ADDR, 0);
3281         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3282                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3283         WREG32(CP_CE_UCODE_ADDR, 0);
3284
3285         /* ME */
3286         fw_data = (const __be32 *)rdev->me_fw->data;
3287         WREG32(CP_ME_RAM_WADDR, 0);
3288         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3289                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3290         WREG32(CP_ME_RAM_WADDR, 0);
3291
3292         WREG32(CP_PFP_UCODE_ADDR, 0);
3293         WREG32(CP_CE_UCODE_ADDR, 0);
3294         WREG32(CP_ME_RAM_WADDR, 0);
3295         WREG32(CP_ME_RAM_RADDR, 0);
3296         return 0;
3297 }
3298
3299 /**
3300  * cik_cp_gfx_start - start the gfx ring
3301  *
3302  * @rdev: radeon_device pointer
3303  *
3304  * Enables the ring and loads the clear state context and other
3305  * packets required to init the ring.
3306  * Returns 0 for success, error for failure.
3307  */
3308 static int cik_cp_gfx_start(struct radeon_device *rdev)
3309 {
3310         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3311         int r, i;
3312
3313         /* init the CP */
3314         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3315         WREG32(CP_ENDIAN_SWAP, 0);
3316         WREG32(CP_DEVICE_ID, 1);
3317
3318         cik_cp_gfx_enable(rdev, true);
3319
3320         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3321         if (r) {
3322                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3323                 return r;
3324         }
3325
3326         /* init the CE partitions.  CE only used for gfx on CIK */
3327         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3328         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3329         radeon_ring_write(ring, 0xc000);
3330         radeon_ring_write(ring, 0xc000);
3331
3332         /* setup clear context state */
3333         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3334         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3335
3336         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3337         radeon_ring_write(ring, 0x80000000);
3338         radeon_ring_write(ring, 0x80000000);
3339
3340         for (i = 0; i < cik_default_size; i++)
3341                 radeon_ring_write(ring, cik_default_state[i]);
3342
3343         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3345
3346         /* set clear context state */
3347         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3348         radeon_ring_write(ring, 0);
3349
3350         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3351         radeon_ring_write(ring, 0x00000316);
3352         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3353         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3354
3355         radeon_ring_unlock_commit(rdev, ring);
3356
3357         return 0;
3358 }
3359
3360 /**
3361  * cik_cp_gfx_fini - stop the gfx ring
3362  *
3363  * @rdev: radeon_device pointer
3364  *
3365  * Stop the gfx ring and tear down the driver ring
3366  * info.
3367  */
3368 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3369 {
3370         cik_cp_gfx_enable(rdev, false);
3371         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3372 }
3373
3374 /**
3375  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3376  *
3377  * @rdev: radeon_device pointer
3378  *
3379  * Program the location and size of the gfx ring buffer
3380  * and test it to make sure it's working.
3381  * Returns 0 for success, error for failure.
3382  */
3383 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3384 {
3385         struct radeon_ring *ring;
3386         u32 tmp;
3387         u32 rb_bufsz;
3388         u64 rb_addr;
3389         int r;
3390
3391         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3392         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3393
3394         /* Set the write pointer delay */
3395         WREG32(CP_RB_WPTR_DELAY, 0);
3396
3397         /* set the RB to use vmid 0 */
3398         WREG32(CP_RB_VMID, 0);
3399
3400         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3401
3402         /* ring 0 - compute and gfx */
3403         /* Set ring buffer size */
3404         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3405         rb_bufsz = order_base_2(ring->ring_size / 8);
3406         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3407 #ifdef __BIG_ENDIAN
3408         tmp |= BUF_SWAP_32BIT;
3409 #endif
3410         WREG32(CP_RB0_CNTL, tmp);
3411
3412         /* Initialize the ring buffer's read and write pointers */
3413         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3414         ring->wptr = 0;
3415         WREG32(CP_RB0_WPTR, ring->wptr);
3416
3417         /* set the wb address wether it's enabled or not */
3418         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3419         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3420
3421         /* scratch register shadowing is no longer supported */
3422         WREG32(SCRATCH_UMSK, 0);
3423
3424         if (!rdev->wb.enabled)
3425                 tmp |= RB_NO_UPDATE;
3426
3427         mdelay(1);
3428         WREG32(CP_RB0_CNTL, tmp);
3429
3430         rb_addr = ring->gpu_addr >> 8;
3431         WREG32(CP_RB0_BASE, rb_addr);
3432         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3433
3434         ring->rptr = RREG32(CP_RB0_RPTR);
3435
3436         /* start the ring */
3437         cik_cp_gfx_start(rdev);
3438         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3439         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3440         if (r) {
3441                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3442                 return r;
3443         }
3444         return 0;
3445 }
3446
3447 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3448                               struct radeon_ring *ring)
3449 {
3450         u32 rptr;
3451
3452
3453
3454         if (rdev->wb.enabled) {
3455                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3456         } else {
3457                 mutex_lock(&rdev->srbm_mutex);
3458                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3459                 rptr = RREG32(CP_HQD_PQ_RPTR);
3460                 cik_srbm_select(rdev, 0, 0, 0, 0);
3461                 mutex_unlock(&rdev->srbm_mutex);
3462         }
3463
3464         return rptr;
3465 }
3466
3467 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3468                               struct radeon_ring *ring)
3469 {
3470         u32 wptr;
3471
3472         if (rdev->wb.enabled) {
3473                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3474         } else {
3475                 mutex_lock(&rdev->srbm_mutex);
3476                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3477                 wptr = RREG32(CP_HQD_PQ_WPTR);
3478                 cik_srbm_select(rdev, 0, 0, 0, 0);
3479                 mutex_unlock(&rdev->srbm_mutex);
3480         }
3481
3482         return wptr;
3483 }
3484
3485 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3486                                struct radeon_ring *ring)
3487 {
3488         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3489         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3490 }
3491
3492 /**
3493  * cik_cp_compute_enable - enable/disable the compute CP MEs
3494  *
3495  * @rdev: radeon_device pointer
3496  * @enable: enable or disable the MEs
3497  *
3498  * Halts or unhalts the compute MEs.
3499  */
3500 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3501 {
3502         if (enable)
3503                 WREG32(CP_MEC_CNTL, 0);
3504         else
3505                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3506         udelay(50);
3507 }
3508
3509 /**
3510  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3511  *
3512  * @rdev: radeon_device pointer
3513  *
3514  * Loads the compute MEC1&2 ucode.
3515  * Returns 0 for success, -EINVAL if the ucode is not available.
3516  */
3517 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3518 {
3519         const __be32 *fw_data;
3520         int i;
3521
3522         if (!rdev->mec_fw)
3523                 return -EINVAL;
3524
3525         cik_cp_compute_enable(rdev, false);
3526
3527         /* MEC1 */
3528         fw_data = (const __be32 *)rdev->mec_fw->data;
3529         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3530         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3531                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3532         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3533
3534         if (rdev->family == CHIP_KAVERI) {
3535                 /* MEC2 */
3536                 fw_data = (const __be32 *)rdev->mec_fw->data;
3537                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3538                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3540                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3541         }
3542
3543         return 0;
3544 }
3545
3546 /**
3547  * cik_cp_compute_start - start the compute queues
3548  *
3549  * @rdev: radeon_device pointer
3550  *
3551  * Enable the compute queues.
3552  * Returns 0 for success, error for failure.
3553  */
3554 static int cik_cp_compute_start(struct radeon_device *rdev)
3555 {
3556         cik_cp_compute_enable(rdev, true);
3557
3558         return 0;
3559 }
3560
3561 /**
3562  * cik_cp_compute_fini - stop the compute queues
3563  *
3564  * @rdev: radeon_device pointer
3565  *
3566  * Stop the compute queues and tear down the driver queue
3567  * info.
3568  */
3569 static void cik_cp_compute_fini(struct radeon_device *rdev)
3570 {
3571         int i, idx, r;
3572
3573         cik_cp_compute_enable(rdev, false);
3574
3575         for (i = 0; i < 2; i++) {
3576                 if (i == 0)
3577                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3578                 else
3579                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3580
3581                 if (rdev->ring[idx].mqd_obj) {
3582                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3583                         if (unlikely(r != 0))
3584                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3585
3586                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3587                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3588
3589                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3590                         rdev->ring[idx].mqd_obj = NULL;
3591                 }
3592         }
3593 }
3594
3595 static void cik_mec_fini(struct radeon_device *rdev)
3596 {
3597         int r;
3598
3599         if (rdev->mec.hpd_eop_obj) {
3600                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3601                 if (unlikely(r != 0))
3602                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3603                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3604                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3605
3606                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3607                 rdev->mec.hpd_eop_obj = NULL;
3608         }
3609 }
3610
3611 #define MEC_HPD_SIZE 2048
3612
3613 static int cik_mec_init(struct radeon_device *rdev)
3614 {
3615         int r;
3616         u32 *hpd;
3617
3618         /*
3619          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3620          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3621          */
3622         if (rdev->family == CHIP_KAVERI)
3623                 rdev->mec.num_mec = 2;
3624         else
3625                 rdev->mec.num_mec = 1;
3626         rdev->mec.num_pipe = 4;
3627         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3628
3629         if (rdev->mec.hpd_eop_obj == NULL) {
3630                 r = radeon_bo_create(rdev,
3631                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3632                                      PAGE_SIZE, true,
3633                                      RADEON_GEM_DOMAIN_GTT, NULL,
3634                                      &rdev->mec.hpd_eop_obj);
3635                 if (r) {
3636                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3637                         return r;
3638                 }
3639         }
3640
3641         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3642         if (unlikely(r != 0)) {
3643                 cik_mec_fini(rdev);
3644                 return r;
3645         }
3646         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3647                           &rdev->mec.hpd_eop_gpu_addr);
3648         if (r) {
3649                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3650                 cik_mec_fini(rdev);
3651                 return r;
3652         }
3653         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3654         if (r) {
3655                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3656                 cik_mec_fini(rdev);
3657                 return r;
3658         }
3659
3660         /* clear memory.  Not sure if this is required or not */
3661         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3662
3663         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3664         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3665
3666         return 0;
3667 }
3668
3669 struct hqd_registers
3670 {
3671         u32 cp_mqd_base_addr;
3672         u32 cp_mqd_base_addr_hi;
3673         u32 cp_hqd_active;
3674         u32 cp_hqd_vmid;
3675         u32 cp_hqd_persistent_state;
3676         u32 cp_hqd_pipe_priority;
3677         u32 cp_hqd_queue_priority;
3678         u32 cp_hqd_quantum;
3679         u32 cp_hqd_pq_base;
3680         u32 cp_hqd_pq_base_hi;
3681         u32 cp_hqd_pq_rptr;
3682         u32 cp_hqd_pq_rptr_report_addr;
3683         u32 cp_hqd_pq_rptr_report_addr_hi;
3684         u32 cp_hqd_pq_wptr_poll_addr;
3685         u32 cp_hqd_pq_wptr_poll_addr_hi;
3686         u32 cp_hqd_pq_doorbell_control;
3687         u32 cp_hqd_pq_wptr;
3688         u32 cp_hqd_pq_control;
3689         u32 cp_hqd_ib_base_addr;
3690         u32 cp_hqd_ib_base_addr_hi;
3691         u32 cp_hqd_ib_rptr;
3692         u32 cp_hqd_ib_control;
3693         u32 cp_hqd_iq_timer;
3694         u32 cp_hqd_iq_rptr;
3695         u32 cp_hqd_dequeue_request;
3696         u32 cp_hqd_dma_offload;
3697         u32 cp_hqd_sema_cmd;
3698         u32 cp_hqd_msg_type;
3699         u32 cp_hqd_atomic0_preop_lo;
3700         u32 cp_hqd_atomic0_preop_hi;
3701         u32 cp_hqd_atomic1_preop_lo;
3702         u32 cp_hqd_atomic1_preop_hi;
3703         u32 cp_hqd_hq_scheduler0;
3704         u32 cp_hqd_hq_scheduler1;
3705         u32 cp_mqd_control;
3706 };
3707
3708 struct bonaire_mqd
3709 {
3710         u32 header;
3711         u32 dispatch_initiator;
3712         u32 dimensions[3];
3713         u32 start_idx[3];
3714         u32 num_threads[3];
3715         u32 pipeline_stat_enable;
3716         u32 perf_counter_enable;
3717         u32 pgm[2];
3718         u32 tba[2];
3719         u32 tma[2];
3720         u32 pgm_rsrc[2];
3721         u32 vmid;
3722         u32 resource_limits;
3723         u32 static_thread_mgmt01[2];
3724         u32 tmp_ring_size;
3725         u32 static_thread_mgmt23[2];
3726         u32 restart[3];
3727         u32 thread_trace_enable;
3728         u32 reserved1;
3729         u32 user_data[16];
3730         u32 vgtcs_invoke_count[2];
3731         struct hqd_registers queue_state;
3732         u32 dequeue_cntr;
3733         u32 interrupt_queue[64];
3734 };
3735
3736 /**
3737  * cik_cp_compute_resume - setup the compute queue registers
3738  *
3739  * @rdev: radeon_device pointer
3740  *
3741  * Program the compute queues and test them to make sure they
3742  * are working.
3743  * Returns 0 for success, error for failure.
3744  */
3745 static int cik_cp_compute_resume(struct radeon_device *rdev)
3746 {
3747         int r, i, idx;
3748         u32 tmp;
3749         bool use_doorbell = true;
3750         u64 hqd_gpu_addr;
3751         u64 mqd_gpu_addr;
3752         u64 eop_gpu_addr;
3753         u64 wb_gpu_addr;
3754         u32 *buf;
3755         struct bonaire_mqd *mqd;
3756
3757         r = cik_cp_compute_start(rdev);
3758         if (r)
3759                 return r;
3760
3761         /* fix up chicken bits */
3762         tmp = RREG32(CP_CPF_DEBUG);
3763         tmp |= (1 << 23);
3764         WREG32(CP_CPF_DEBUG, tmp);
3765
3766         /* init the pipes */
3767         mutex_lock(&rdev->srbm_mutex);
3768         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3769                 int me = (i < 4) ? 1 : 2;
3770                 int pipe = (i < 4) ? i : (i - 4);
3771
3772                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3773
3774                 cik_srbm_select(rdev, me, pipe, 0, 0);
3775
3776                 /* write the EOP addr */
3777                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3778                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3779
3780                 /* set the VMID assigned */
3781                 WREG32(CP_HPD_EOP_VMID, 0);
3782
3783                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3784                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3785                 tmp &= ~EOP_SIZE_MASK;
3786                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3787                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3788         }
3789         cik_srbm_select(rdev, 0, 0, 0, 0);
3790         mutex_unlock(&rdev->srbm_mutex);
3791
3792         /* init the queues.  Just two for now. */
3793         for (i = 0; i < 2; i++) {
3794                 if (i == 0)
3795                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3796                 else
3797                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3798
3799                 if (rdev->ring[idx].mqd_obj == NULL) {
3800                         r = radeon_bo_create(rdev,
3801                                              sizeof(struct bonaire_mqd),
3802                                              PAGE_SIZE, true,
3803                                              RADEON_GEM_DOMAIN_GTT, NULL,
3804                                              &rdev->ring[idx].mqd_obj);
3805                         if (r) {
3806                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3807                                 return r;
3808                         }
3809                 }
3810
3811                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3812                 if (unlikely(r != 0)) {
3813                         cik_cp_compute_fini(rdev);
3814                         return r;
3815                 }
3816                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3817                                   &mqd_gpu_addr);
3818                 if (r) {
3819                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3820                         cik_cp_compute_fini(rdev);
3821                         return r;
3822                 }
3823                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3824                 if (r) {
3825                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3826                         cik_cp_compute_fini(rdev);
3827                         return r;
3828                 }
3829
3830                 /* doorbell offset */
3831                 rdev->ring[idx].doorbell_offset =
3832                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3833
3834                 /* init the mqd struct */
3835                 memset(buf, 0, sizeof(struct bonaire_mqd));
3836
3837                 mqd = (struct bonaire_mqd *)buf;
3838                 mqd->header = 0xC0310800;
3839                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3840                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3841                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3842                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3843
3844                 mutex_lock(&rdev->srbm_mutex);
3845                 cik_srbm_select(rdev, rdev->ring[idx].me,
3846                                 rdev->ring[idx].pipe,
3847                                 rdev->ring[idx].queue, 0);
3848
3849                 /* disable wptr polling */
3850                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3851                 tmp &= ~WPTR_POLL_EN;
3852                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3853
3854                 /* enable doorbell? */
3855                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3856                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3857                 if (use_doorbell)
3858                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3859                 else
3860                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3861                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3862                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3863
3864                 /* disable the queue if it's active */
3865                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3866                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3867                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3868                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3869                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3870                         for (i = 0; i < rdev->usec_timeout; i++) {
3871                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3872                                         break;
3873                                 udelay(1);
3874                         }
3875                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3876                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3877                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3878                 }
3879
3880                 /* set the pointer to the MQD */
3881                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3882                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3883                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3884                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3885                 /* set MQD vmid to 0 */
3886                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3887                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3888                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3889
3890                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3891                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3892                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3893                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3894                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3895                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3896
3897                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3898                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3899                 mqd->queue_state.cp_hqd_pq_control &=
3900                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3901
3902                 mqd->queue_state.cp_hqd_pq_control |=
3903                         order_base_2(rdev->ring[idx].ring_size / 8);
3904                 mqd->queue_state.cp_hqd_pq_control |=
3905                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3906 #ifdef __BIG_ENDIAN
3907                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3908 #endif
3909                 mqd->queue_state.cp_hqd_pq_control &=
3910                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3911                 mqd->queue_state.cp_hqd_pq_control |=
3912                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3913                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3914
3915                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3916                 if (i == 0)
3917                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3918                 else
3919                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3920                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3921                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3922                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3923                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3924                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3925
3926                 /* set the wb address wether it's enabled or not */
3927                 if (i == 0)
3928                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3929                 else
3930                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3931                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3932                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3933                         upper_32_bits(wb_gpu_addr) & 0xffff;
3934                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3935                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3936                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3937                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3938
3939                 /* enable the doorbell if requested */
3940                 if (use_doorbell) {
3941                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3942                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3943                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3944                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3945                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3946                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3947                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3948                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3949
3950                 } else {
3951                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3952                 }
3953                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3954                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3955
3956                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3957                 rdev->ring[idx].wptr = 0;
3958                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3959                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3960                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3961                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3962
3963                 /* set the vmid for the queue */
3964                 mqd->queue_state.cp_hqd_vmid = 0;
3965                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3966
3967                 /* activate the queue */
3968                 mqd->queue_state.cp_hqd_active = 1;
3969                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3970
3971                 cik_srbm_select(rdev, 0, 0, 0, 0);
3972                 mutex_unlock(&rdev->srbm_mutex);
3973
3974                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3975                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3976
3977                 rdev->ring[idx].ready = true;
3978                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3979                 if (r)
3980                         rdev->ring[idx].ready = false;
3981         }
3982
3983         return 0;
3984 }
3985
3986 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3987 {
3988         cik_cp_gfx_enable(rdev, enable);
3989         cik_cp_compute_enable(rdev, enable);
3990 }
3991
3992 static int cik_cp_load_microcode(struct radeon_device *rdev)
3993 {
3994         int r;
3995
3996         r = cik_cp_gfx_load_microcode(rdev);
3997         if (r)
3998                 return r;
3999         r = cik_cp_compute_load_microcode(rdev);
4000         if (r)
4001                 return r;
4002
4003         return 0;
4004 }
4005
4006 static void cik_cp_fini(struct radeon_device *rdev)
4007 {
4008         cik_cp_gfx_fini(rdev);
4009         cik_cp_compute_fini(rdev);
4010 }
4011
4012 static int cik_cp_resume(struct radeon_device *rdev)
4013 {
4014         int r;
4015
4016         r = cik_cp_load_microcode(rdev);
4017         if (r)
4018                 return r;
4019
4020         r = cik_cp_gfx_resume(rdev);
4021         if (r)
4022                 return r;
4023         r = cik_cp_compute_resume(rdev);
4024         if (r)
4025                 return r;
4026
4027         return 0;
4028 }
4029
4030 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4031 {
4032         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4033                 RREG32(GRBM_STATUS));
4034         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4035                 RREG32(GRBM_STATUS2));
4036         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4037                 RREG32(GRBM_STATUS_SE0));
4038         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4039                 RREG32(GRBM_STATUS_SE1));
4040         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4041                 RREG32(GRBM_STATUS_SE2));
4042         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4043                 RREG32(GRBM_STATUS_SE3));
4044         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4045                 RREG32(SRBM_STATUS));
4046         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4047                 RREG32(SRBM_STATUS2));
4048         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4049                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4050         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4051                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4052         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4053         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4054                  RREG32(CP_STALLED_STAT1));
4055         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4056                  RREG32(CP_STALLED_STAT2));
4057         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4058                  RREG32(CP_STALLED_STAT3));
4059         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4060                  RREG32(CP_CPF_BUSY_STAT));
4061         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4062                  RREG32(CP_CPF_STALLED_STAT1));
4063         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4064         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4065         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4066                  RREG32(CP_CPC_STALLED_STAT1));
4067         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4068 }
4069
4070 /**
4071  * cik_gpu_check_soft_reset - check which blocks are busy
4072  *
4073  * @rdev: radeon_device pointer
4074  *
4075  * Check which blocks are busy and return the relevant reset
4076  * mask to be used by cik_gpu_soft_reset().
4077  * Returns a mask of the blocks to be reset.
4078  */
4079 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4080 {
4081         u32 reset_mask = 0;
4082         u32 tmp;
4083
4084         /* GRBM_STATUS */
4085         tmp = RREG32(GRBM_STATUS);
4086         if (tmp & (PA_BUSY | SC_BUSY |
4087                    BCI_BUSY | SX_BUSY |
4088                    TA_BUSY | VGT_BUSY |
4089                    DB_BUSY | CB_BUSY |
4090                    GDS_BUSY | SPI_BUSY |
4091                    IA_BUSY | IA_BUSY_NO_DMA))
4092                 reset_mask |= RADEON_RESET_GFX;
4093
4094         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4095                 reset_mask |= RADEON_RESET_CP;
4096
4097         /* GRBM_STATUS2 */
4098         tmp = RREG32(GRBM_STATUS2);
4099         if (tmp & RLC_BUSY)
4100                 reset_mask |= RADEON_RESET_RLC;
4101
4102         /* SDMA0_STATUS_REG */
4103         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4104         if (!(tmp & SDMA_IDLE))
4105                 reset_mask |= RADEON_RESET_DMA;
4106
4107         /* SDMA1_STATUS_REG */
4108         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4109         if (!(tmp & SDMA_IDLE))
4110                 reset_mask |= RADEON_RESET_DMA1;
4111
4112         /* SRBM_STATUS2 */
4113         tmp = RREG32(SRBM_STATUS2);
4114         if (tmp & SDMA_BUSY)
4115                 reset_mask |= RADEON_RESET_DMA;
4116
4117         if (tmp & SDMA1_BUSY)
4118                 reset_mask |= RADEON_RESET_DMA1;
4119
4120         /* SRBM_STATUS */
4121         tmp = RREG32(SRBM_STATUS);
4122
4123         if (tmp & IH_BUSY)
4124                 reset_mask |= RADEON_RESET_IH;
4125
4126         if (tmp & SEM_BUSY)
4127                 reset_mask |= RADEON_RESET_SEM;
4128
4129         if (tmp & GRBM_RQ_PENDING)
4130                 reset_mask |= RADEON_RESET_GRBM;
4131
4132         if (tmp & VMC_BUSY)
4133                 reset_mask |= RADEON_RESET_VMC;
4134
4135         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4136                    MCC_BUSY | MCD_BUSY))
4137                 reset_mask |= RADEON_RESET_MC;
4138
4139         if (evergreen_is_display_hung(rdev))
4140                 reset_mask |= RADEON_RESET_DISPLAY;
4141
4142         /* Skip MC reset as it's mostly likely not hung, just busy */
4143         if (reset_mask & RADEON_RESET_MC) {
4144                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4145                 reset_mask &= ~RADEON_RESET_MC;
4146         }
4147
4148         return reset_mask;
4149 }
4150
4151 /**
4152  * cik_gpu_soft_reset - soft reset GPU
4153  *
4154  * @rdev: radeon_device pointer
4155  * @reset_mask: mask of which blocks to reset
4156  *
4157  * Soft reset the blocks specified in @reset_mask.
4158  */
4159 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4160 {
4161         struct evergreen_mc_save save;
4162         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4163         u32 tmp;
4164
4165         if (reset_mask == 0)
4166                 return;
4167
4168         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4169
4170         cik_print_gpu_status_regs(rdev);
4171         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4172                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4173         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4174                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4175
4176         /* stop the rlc */
4177         cik_rlc_stop(rdev);
4178
4179         /* Disable GFX parsing/prefetching */
4180         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4181
4182         /* Disable MEC parsing/prefetching */
4183         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4184
4185         if (reset_mask & RADEON_RESET_DMA) {
4186                 /* sdma0 */
4187                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4188                 tmp |= SDMA_HALT;
4189                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4190         }
4191         if (reset_mask & RADEON_RESET_DMA1) {
4192                 /* sdma1 */
4193                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4194                 tmp |= SDMA_HALT;
4195                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4196         }
4197
4198         evergreen_mc_stop(rdev, &save);
4199         if (evergreen_mc_wait_for_idle(rdev)) {
4200                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4201         }
4202
4203         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4204                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4205
4206         if (reset_mask & RADEON_RESET_CP) {
4207                 grbm_soft_reset |= SOFT_RESET_CP;
4208
4209                 srbm_soft_reset |= SOFT_RESET_GRBM;
4210         }
4211
4212         if (reset_mask & RADEON_RESET_DMA)
4213                 srbm_soft_reset |= SOFT_RESET_SDMA;
4214
4215         if (reset_mask & RADEON_RESET_DMA1)
4216                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4217
4218         if (reset_mask & RADEON_RESET_DISPLAY)
4219                 srbm_soft_reset |= SOFT_RESET_DC;
4220
4221         if (reset_mask & RADEON_RESET_RLC)
4222                 grbm_soft_reset |= SOFT_RESET_RLC;
4223
4224         if (reset_mask & RADEON_RESET_SEM)
4225                 srbm_soft_reset |= SOFT_RESET_SEM;
4226
4227         if (reset_mask & RADEON_RESET_IH)
4228                 srbm_soft_reset |= SOFT_RESET_IH;
4229
4230         if (reset_mask & RADEON_RESET_GRBM)
4231                 srbm_soft_reset |= SOFT_RESET_GRBM;
4232
4233         if (reset_mask & RADEON_RESET_VMC)
4234                 srbm_soft_reset |= SOFT_RESET_VMC;
4235
4236         if (!(rdev->flags & RADEON_IS_IGP)) {
4237                 if (reset_mask & RADEON_RESET_MC)
4238                         srbm_soft_reset |= SOFT_RESET_MC;
4239         }
4240
4241         if (grbm_soft_reset) {
4242                 tmp = RREG32(GRBM_SOFT_RESET);
4243                 tmp |= grbm_soft_reset;
4244                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4245                 WREG32(GRBM_SOFT_RESET, tmp);
4246                 tmp = RREG32(GRBM_SOFT_RESET);
4247
4248                 udelay(50);
4249
4250                 tmp &= ~grbm_soft_reset;
4251                 WREG32(GRBM_SOFT_RESET, tmp);
4252                 tmp = RREG32(GRBM_SOFT_RESET);
4253         }
4254
4255         if (srbm_soft_reset) {
4256                 tmp = RREG32(SRBM_SOFT_RESET);
4257                 tmp |= srbm_soft_reset;
4258                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4259                 WREG32(SRBM_SOFT_RESET, tmp);
4260                 tmp = RREG32(SRBM_SOFT_RESET);
4261
4262                 udelay(50);
4263
4264                 tmp &= ~srbm_soft_reset;
4265                 WREG32(SRBM_SOFT_RESET, tmp);
4266                 tmp = RREG32(SRBM_SOFT_RESET);
4267         }
4268
4269         /* Wait a little for things to settle down */
4270         udelay(50);
4271
4272         evergreen_mc_resume(rdev, &save);
4273         udelay(50);
4274
4275         cik_print_gpu_status_regs(rdev);
4276 }
4277
4278 /**
4279  * cik_asic_reset - soft reset GPU
4280  *
4281  * @rdev: radeon_device pointer
4282  *
4283  * Look up which blocks are hung and attempt
4284  * to reset them.
4285  * Returns 0 for success.
4286  */
4287 int cik_asic_reset(struct radeon_device *rdev)
4288 {
4289         u32 reset_mask;
4290
4291         reset_mask = cik_gpu_check_soft_reset(rdev);
4292
4293         if (reset_mask)
4294                 r600_set_bios_scratch_engine_hung(rdev, true);
4295
4296         cik_gpu_soft_reset(rdev, reset_mask);
4297
4298         reset_mask = cik_gpu_check_soft_reset(rdev);
4299
4300         if (!reset_mask)
4301                 r600_set_bios_scratch_engine_hung(rdev, false);
4302
4303         return 0;
4304 }
4305
4306 /**
4307  * cik_gfx_is_lockup - check if the 3D engine is locked up
4308  *
4309  * @rdev: radeon_device pointer
4310  * @ring: radeon_ring structure holding ring information
4311  *
4312  * Check if the 3D engine is locked up (CIK).
4313  * Returns true if the engine is locked, false if not.
4314  */
4315 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4316 {
4317         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4318
4319         if (!(reset_mask & (RADEON_RESET_GFX |
4320                             RADEON_RESET_COMPUTE |
4321                             RADEON_RESET_CP))) {
4322                 radeon_ring_lockup_update(ring);
4323                 return false;
4324         }
4325         /* force CP activities */
4326         radeon_ring_force_activity(rdev, ring);
4327         return radeon_ring_test_lockup(rdev, ring);
4328 }
4329
4330 /* MC */
4331 /**
4332  * cik_mc_program - program the GPU memory controller
4333  *
4334  * @rdev: radeon_device pointer
4335  *
4336  * Set the location of vram, gart, and AGP in the GPU's
4337  * physical address space (CIK).
4338  */
4339 static void cik_mc_program(struct radeon_device *rdev)
4340 {
4341         struct evergreen_mc_save save;
4342         u32 tmp;
4343         int i, j;
4344
4345         /* Initialize HDP */
4346         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4347                 WREG32((0x2c14 + j), 0x00000000);
4348                 WREG32((0x2c18 + j), 0x00000000);
4349                 WREG32((0x2c1c + j), 0x00000000);
4350                 WREG32((0x2c20 + j), 0x00000000);
4351                 WREG32((0x2c24 + j), 0x00000000);
4352         }
4353         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4354
4355         evergreen_mc_stop(rdev, &save);
4356         if (radeon_mc_wait_for_idle(rdev)) {
4357                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4358         }
4359         /* Lockout access through VGA aperture*/
4360         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4361         /* Update configuration */
4362         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4363                rdev->mc.vram_start >> 12);
4364         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4365                rdev->mc.vram_end >> 12);
4366         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4367                rdev->vram_scratch.gpu_addr >> 12);
4368         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4369         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4370         WREG32(MC_VM_FB_LOCATION, tmp);
4371         /* XXX double check these! */
4372         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4373         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4374         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4375         WREG32(MC_VM_AGP_BASE, 0);
4376         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4377         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4378         if (radeon_mc_wait_for_idle(rdev)) {
4379                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4380         }
4381         evergreen_mc_resume(rdev, &save);
4382         /* we need to own VRAM, so turn off the VGA renderer here
4383          * to stop it overwriting our objects */
4384         rv515_vga_render_disable(rdev);
4385 }
4386
4387 /**
4388  * cik_mc_init - initialize the memory controller driver params
4389  *
4390  * @rdev: radeon_device pointer
4391  *
4392  * Look up the amount of vram, vram width, and decide how to place
4393  * vram and gart within the GPU's physical address space (CIK).
4394  * Returns 0 for success.
4395  */
4396 static int cik_mc_init(struct radeon_device *rdev)
4397 {
4398         u32 tmp;
4399         int chansize, numchan;
4400
4401         /* Get VRAM informations */
4402         rdev->mc.vram_is_ddr = true;
4403         tmp = RREG32(MC_ARB_RAMCFG);
4404         if (tmp & CHANSIZE_MASK) {
4405                 chansize = 64;
4406         } else {
4407                 chansize = 32;
4408         }
4409         tmp = RREG32(MC_SHARED_CHMAP);
4410         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4411         case 0:
4412         default:
4413                 numchan = 1;
4414                 break;
4415         case 1:
4416                 numchan = 2;
4417                 break;
4418         case 2:
4419                 numchan = 4;
4420                 break;
4421         case 3:
4422                 numchan = 8;
4423                 break;
4424         case 4:
4425                 numchan = 3;
4426                 break;
4427         case 5:
4428                 numchan = 6;
4429                 break;
4430         case 6:
4431                 numchan = 10;
4432                 break;
4433         case 7:
4434                 numchan = 12;
4435                 break;
4436         case 8:
4437                 numchan = 16;
4438                 break;
4439         }
4440         rdev->mc.vram_width = numchan * chansize;
4441         /* Could aper size report 0 ? */
4442         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4443         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4444         /* size in MB on si */
4445         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4446         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4447         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4448         si_vram_gtt_location(rdev, &rdev->mc);
4449         radeon_update_bandwidth_info(rdev);
4450
4451         return 0;
4452 }
4453
4454 /*
4455  * GART
4456  * VMID 0 is the physical GPU addresses as used by the kernel.
4457  * VMIDs 1-15 are used for userspace clients and are handled
4458  * by the radeon vm/hsa code.
4459  */
4460 /**
4461  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4462  *
4463  * @rdev: radeon_device pointer
4464  *
4465  * Flush the TLB for the VMID 0 page table (CIK).
4466  */
4467 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4468 {
4469         /* flush hdp cache */
4470         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4471
4472         /* bits 0-15 are the VM contexts0-15 */
4473         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4474 }
4475
4476 /**
4477  * cik_pcie_gart_enable - gart enable
4478  *
4479  * @rdev: radeon_device pointer
4480  *
4481  * This sets up the TLBs, programs the page tables for VMID0,
4482  * sets up the hw for VMIDs 1-15 which are allocated on
4483  * demand, and sets up the global locations for the LDS, GDS,
4484  * and GPUVM for FSA64 clients (CIK).
4485  * Returns 0 for success, errors for failure.
4486  */
4487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4488 {
4489         int r, i;
4490
4491         if (rdev->gart.robj == NULL) {
4492                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4493                 return -EINVAL;
4494         }
4495         r = radeon_gart_table_vram_pin(rdev);
4496         if (r)
4497                 return r;
4498         radeon_gart_restore(rdev);
4499         /* Setup TLB control */
4500         WREG32(MC_VM_MX_L1_TLB_CNTL,
4501                (0xA << 7) |
4502                ENABLE_L1_TLB |
4503                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4504                ENABLE_ADVANCED_DRIVER_MODEL |
4505                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4506         /* Setup L2 cache */
4507         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4508                ENABLE_L2_FRAGMENT_PROCESSING |
4509                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4510                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4511                EFFECTIVE_L2_QUEUE_SIZE(7) |
4512                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4513         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4514         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4515                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4516         /* setup context0 */
4517         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4518         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4519         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4520         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4521                         (u32)(rdev->dummy_page.addr >> 12));
4522         WREG32(VM_CONTEXT0_CNTL2, 0);
4523         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4524                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4525
4526         WREG32(0x15D4, 0);
4527         WREG32(0x15D8, 0);
4528         WREG32(0x15DC, 0);
4529
4530         /* empty context1-15 */
4531         /* FIXME start with 4G, once using 2 level pt switch to full
4532          * vm size space
4533          */
4534         /* set vm size, must be a multiple of 4 */
4535         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4536         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4537         for (i = 1; i < 16; i++) {
4538                 if (i < 8)
4539                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4540                                rdev->gart.table_addr >> 12);
4541                 else
4542                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4543                                rdev->gart.table_addr >> 12);
4544         }
4545
4546         /* enable context1-15 */
4547         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4548                (u32)(rdev->dummy_page.addr >> 12));
4549         WREG32(VM_CONTEXT1_CNTL2, 4);
4550         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4551                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4552                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4553                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4554                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4555                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4556                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4557                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4558                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4559                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4560                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4561                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4562                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4563
4564         /* TC cache setup ??? */
4565         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4566         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4567         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4568
4569         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4570         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4571         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4572         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4573         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4574
4575         WREG32(TC_CFG_L1_VOLATILE, 0);
4576         WREG32(TC_CFG_L2_VOLATILE, 0);
4577
4578         if (rdev->family == CHIP_KAVERI) {
4579                 u32 tmp = RREG32(CHUB_CONTROL);
4580                 tmp &= ~BYPASS_VM;
4581                 WREG32(CHUB_CONTROL, tmp);
4582         }
4583
4584         /* XXX SH_MEM regs */
4585         /* where to put LDS, scratch, GPUVM in FSA64 space */
4586         mutex_lock(&rdev->srbm_mutex);
4587         for (i = 0; i < 16; i++) {
4588                 cik_srbm_select(rdev, 0, 0, 0, i);
4589                 /* CP and shaders */
4590                 WREG32(SH_MEM_CONFIG, 0);
4591                 WREG32(SH_MEM_APE1_BASE, 1);
4592                 WREG32(SH_MEM_APE1_LIMIT, 0);
4593                 WREG32(SH_MEM_BASES, 0);
4594                 /* SDMA GFX */
4595                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4596                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4597                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4598                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4599                 /* XXX SDMA RLC - todo */
4600         }
4601         cik_srbm_select(rdev, 0, 0, 0, 0);
4602         mutex_unlock(&rdev->srbm_mutex);
4603
4604         cik_pcie_gart_tlb_flush(rdev);
4605         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4606                  (unsigned)(rdev->mc.gtt_size >> 20),
4607                  (unsigned long long)rdev->gart.table_addr);
4608         rdev->gart.ready = true;
4609         return 0;
4610 }
4611
4612 /**
4613  * cik_pcie_gart_disable - gart disable
4614  *
4615  * @rdev: radeon_device pointer
4616  *
4617  * This disables all VM page table (CIK).
4618  */
4619 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4620 {
4621         /* Disable all tables */
4622         WREG32(VM_CONTEXT0_CNTL, 0);
4623         WREG32(VM_CONTEXT1_CNTL, 0);
4624         /* Setup TLB control */
4625         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4626                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4627         /* Setup L2 cache */
4628         WREG32(VM_L2_CNTL,
4629                ENABLE_L2_FRAGMENT_PROCESSING |
4630                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4631                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4632                EFFECTIVE_L2_QUEUE_SIZE(7) |
4633                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4634         WREG32(VM_L2_CNTL2, 0);
4635         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4636                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4637         radeon_gart_table_vram_unpin(rdev);
4638 }
4639
4640 /**
4641  * cik_pcie_gart_fini - vm fini callback
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Tears down the driver GART/VM setup (CIK).
4646  */
4647 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4648 {
4649         cik_pcie_gart_disable(rdev);
4650         radeon_gart_table_vram_free(rdev);
4651         radeon_gart_fini(rdev);
4652 }
4653
4654 /* vm parser */
4655 /**
4656  * cik_ib_parse - vm ib_parse callback
4657  *
4658  * @rdev: radeon_device pointer
4659  * @ib: indirect buffer pointer
4660  *
4661  * CIK uses hw IB checking so this is a nop (CIK).
4662  */
4663 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4664 {
4665         return 0;
4666 }
4667
4668 /*
4669  * vm
4670  * VMID 0 is the physical GPU addresses as used by the kernel.
4671  * VMIDs 1-15 are used for userspace clients and are handled
4672  * by the radeon vm/hsa code.
4673  */
4674 /**
4675  * cik_vm_init - cik vm init callback
4676  *
4677  * @rdev: radeon_device pointer
4678  *
4679  * Inits cik specific vm parameters (number of VMs, base of vram for
4680  * VMIDs 1-15) (CIK).
4681  * Returns 0 for success.
4682  */
4683 int cik_vm_init(struct radeon_device *rdev)
4684 {
4685         /* number of VMs */
4686         rdev->vm_manager.nvm = 16;
4687         /* base offset of vram pages */
4688         if (rdev->flags & RADEON_IS_IGP) {
4689                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4690                 tmp <<= 22;
4691                 rdev->vm_manager.vram_base_offset = tmp;
4692         } else
4693                 rdev->vm_manager.vram_base_offset = 0;
4694
4695         return 0;
4696 }
4697
4698 /**
4699  * cik_vm_fini - cik vm fini callback
4700  *
4701  * @rdev: radeon_device pointer
4702  *
4703  * Tear down any asic specific VM setup (CIK).
4704  */
4705 void cik_vm_fini(struct radeon_device *rdev)
4706 {
4707 }
4708
4709 /**
4710  * cik_vm_decode_fault - print human readable fault info
4711  *
4712  * @rdev: radeon_device pointer
4713  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4714  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4715  *
4716  * Print human readable fault information (CIK).
4717  */
4718 static void cik_vm_decode_fault(struct radeon_device *rdev,
4719                                 u32 status, u32 addr, u32 mc_client)
4720 {
4721         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4722         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4723         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4724         char *block = (char *)&mc_client;
4725
4726         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4727                protections, vmid, addr,
4728                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4729                block, mc_id);
4730 }
4731
4732 /**
4733  * cik_vm_flush - cik vm flush using the CP
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Update the page table base and flush the VM TLB
4738  * using the CP (CIK).
4739  */
4740 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4741 {
4742         struct radeon_ring *ring = &rdev->ring[ridx];
4743
4744         if (vm == NULL)
4745                 return;
4746
4747         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749                                  WRITE_DATA_DST_SEL(0)));
4750         if (vm->id < 8) {
4751                 radeon_ring_write(ring,
4752                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4753         } else {
4754                 radeon_ring_write(ring,
4755                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4756         }
4757         radeon_ring_write(ring, 0);
4758         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4759
4760         /* update SH_MEM_* regs */
4761         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763                                  WRITE_DATA_DST_SEL(0)));
4764         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4765         radeon_ring_write(ring, 0);
4766         radeon_ring_write(ring, VMID(vm->id));
4767
4768         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4769         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4770                                  WRITE_DATA_DST_SEL(0)));
4771         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4772         radeon_ring_write(ring, 0);
4773
4774         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4775         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4776         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4777         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4778
4779         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4780         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4781                                  WRITE_DATA_DST_SEL(0)));
4782         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4783         radeon_ring_write(ring, 0);
4784         radeon_ring_write(ring, VMID(0));
4785
4786         /* HDP flush */
4787         /* We should be using the WAIT_REG_MEM packet here like in
4788          * cik_fence_ring_emit(), but it causes the CP to hang in this
4789          * context...
4790          */
4791         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4792         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793                                  WRITE_DATA_DST_SEL(0)));
4794         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4795         radeon_ring_write(ring, 0);
4796         radeon_ring_write(ring, 0);
4797
4798         /* bits 0-15 are the VM contexts0-15 */
4799         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4800         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4801                                  WRITE_DATA_DST_SEL(0)));
4802         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4803         radeon_ring_write(ring, 0);
4804         radeon_ring_write(ring, 1 << vm->id);
4805
4806         /* compute doesn't have PFP */
4807         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4808                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4809                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4810                 radeon_ring_write(ring, 0x0);
4811         }
4812 }
4813
4814 /**
4815  * cik_vm_set_page - update the page tables using sDMA
4816  *
4817  * @rdev: radeon_device pointer
4818  * @ib: indirect buffer to fill with commands
4819  * @pe: addr of the page entry
4820  * @addr: dst addr to write into pe
4821  * @count: number of page entries to update
4822  * @incr: increase next addr by incr bytes
4823  * @flags: access flags
4824  *
4825  * Update the page tables using CP or sDMA (CIK).
4826  */
4827 void cik_vm_set_page(struct radeon_device *rdev,
4828                      struct radeon_ib *ib,
4829                      uint64_t pe,
4830                      uint64_t addr, unsigned count,
4831                      uint32_t incr, uint32_t flags)
4832 {
4833         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4834         uint64_t value;
4835         unsigned ndw;
4836
4837         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4838                 /* CP */
4839                 while (count) {
4840                         ndw = 2 + count * 2;
4841                         if (ndw > 0x3FFE)
4842                                 ndw = 0x3FFE;
4843
4844                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4845                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4846                                                     WRITE_DATA_DST_SEL(1));
4847                         ib->ptr[ib->length_dw++] = pe;
4848                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4849                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4850                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4851                                         value = radeon_vm_map_gart(rdev, addr);
4852                                         value &= 0xFFFFFFFFFFFFF000ULL;
4853                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4854                                         value = addr;
4855                                 } else {
4856                                         value = 0;
4857                                 }
4858                                 addr += incr;
4859                                 value |= r600_flags;
4860                                 ib->ptr[ib->length_dw++] = value;
4861                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4862                         }
4863                 }
4864         } else {
4865                 /* DMA */
4866                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4867         }
4868 }
4869
4870 /*
4871  * RLC
4872  * The RLC is a multi-purpose microengine that handles a
4873  * variety of functions, the most important of which is
4874  * the interrupt controller.
4875  */
4876 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4877                                           bool enable)
4878 {
4879         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4880
4881         if (enable)
4882                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4883         else
4884                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4885         WREG32(CP_INT_CNTL_RING0, tmp);
4886 }
4887
4888 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4889 {
4890         u32 tmp;
4891
4892         tmp = RREG32(RLC_LB_CNTL);
4893         if (enable)
4894                 tmp |= LOAD_BALANCE_ENABLE;
4895         else
4896                 tmp &= ~LOAD_BALANCE_ENABLE;
4897         WREG32(RLC_LB_CNTL, tmp);
4898 }
4899
4900 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4901 {
4902         u32 i, j, k;
4903         u32 mask;
4904
4905         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4906                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4907                         cik_select_se_sh(rdev, i, j);
4908                         for (k = 0; k < rdev->usec_timeout; k++) {
4909                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4910                                         break;
4911                                 udelay(1);
4912                         }
4913                 }
4914         }
4915         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4916
4917         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4918         for (k = 0; k < rdev->usec_timeout; k++) {
4919                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4920                         break;
4921                 udelay(1);
4922         }
4923 }
4924
4925 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4926 {
4927         u32 tmp;
4928
4929         tmp = RREG32(RLC_CNTL);
4930         if (tmp != rlc)
4931                 WREG32(RLC_CNTL, rlc);
4932 }
4933
4934 static u32 cik_halt_rlc(struct radeon_device *rdev)
4935 {
4936         u32 data, orig;
4937
4938         orig = data = RREG32(RLC_CNTL);
4939
4940         if (data & RLC_ENABLE) {
4941                 u32 i;
4942
4943                 data &= ~RLC_ENABLE;
4944                 WREG32(RLC_CNTL, data);
4945
4946                 for (i = 0; i < rdev->usec_timeout; i++) {
4947                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4948                                 break;
4949                         udelay(1);
4950                 }
4951
4952                 cik_wait_for_rlc_serdes(rdev);
4953         }
4954
4955         return orig;
4956 }
4957
4958 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4959 {
4960         u32 tmp, i, mask;
4961
4962         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4963         WREG32(RLC_GPR_REG2, tmp);
4964
4965         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4966         for (i = 0; i < rdev->usec_timeout; i++) {
4967                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4968                         break;
4969                 udelay(1);
4970         }
4971
4972         for (i = 0; i < rdev->usec_timeout; i++) {
4973                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4974                         break;
4975                 udelay(1);
4976         }
4977 }
4978
4979 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4980 {
4981         u32 tmp;
4982
4983         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4984         WREG32(RLC_GPR_REG2, tmp);
4985 }
4986
4987 /**
4988  * cik_rlc_stop - stop the RLC ME
4989  *
4990  * @rdev: radeon_device pointer
4991  *
4992  * Halt the RLC ME (MicroEngine) (CIK).
4993  */
4994 static void cik_rlc_stop(struct radeon_device *rdev)
4995 {
4996         WREG32(RLC_CNTL, 0);
4997
4998         cik_enable_gui_idle_interrupt(rdev, false);
4999
5000         cik_wait_for_rlc_serdes(rdev);
5001 }
5002
5003 /**
5004  * cik_rlc_start - start the RLC ME
5005  *
5006  * @rdev: radeon_device pointer
5007  *
5008  * Unhalt the RLC ME (MicroEngine) (CIK).
5009  */
5010 static void cik_rlc_start(struct radeon_device *rdev)
5011 {
5012         WREG32(RLC_CNTL, RLC_ENABLE);
5013
5014         cik_enable_gui_idle_interrupt(rdev, true);
5015
5016         udelay(50);
5017 }
5018
5019 /**
5020  * cik_rlc_resume - setup the RLC hw
5021  *
5022  * @rdev: radeon_device pointer
5023  *
5024  * Initialize the RLC registers, load the ucode,
5025  * and start the RLC (CIK).
5026  * Returns 0 for success, -EINVAL if the ucode is not available.
5027  */
5028 static int cik_rlc_resume(struct radeon_device *rdev)
5029 {
5030         u32 i, size, tmp;
5031         const __be32 *fw_data;
5032
5033         if (!rdev->rlc_fw)
5034                 return -EINVAL;
5035
5036         switch (rdev->family) {
5037         case CHIP_BONAIRE:
5038         default:
5039                 size = BONAIRE_RLC_UCODE_SIZE;
5040                 break;
5041         case CHIP_KAVERI:
5042                 size = KV_RLC_UCODE_SIZE;
5043                 break;
5044         case CHIP_KABINI:
5045                 size = KB_RLC_UCODE_SIZE;
5046                 break;
5047         }
5048
5049         cik_rlc_stop(rdev);
5050
5051         /* disable CG */
5052         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5053         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5054
5055         si_rlc_reset(rdev);
5056
5057         cik_init_pg(rdev);
5058
5059         cik_init_cg(rdev);
5060
5061         WREG32(RLC_LB_CNTR_INIT, 0);
5062         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5063
5064         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5065         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5066         WREG32(RLC_LB_PARAMS, 0x00600408);
5067         WREG32(RLC_LB_CNTL, 0x80000004);
5068
5069         WREG32(RLC_MC_CNTL, 0);
5070         WREG32(RLC_UCODE_CNTL, 0);
5071
5072         fw_data = (const __be32 *)rdev->rlc_fw->data;
5073                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5074         for (i = 0; i < size; i++)
5075                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5076         WREG32(RLC_GPM_UCODE_ADDR, 0);
5077
5078         /* XXX - find out what chips support lbpw */
5079         cik_enable_lbpw(rdev, false);
5080
5081         if (rdev->family == CHIP_BONAIRE)
5082                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5083
5084         cik_rlc_start(rdev);
5085
5086         return 0;
5087 }
5088
5089 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5090 {
5091         u32 data, orig, tmp, tmp2;
5092
5093         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5094
5095         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5096                 cik_enable_gui_idle_interrupt(rdev, true);
5097
5098                 tmp = cik_halt_rlc(rdev);
5099
5100                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5101                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5102                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5103                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5104                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5105
5106                 cik_update_rlc(rdev, tmp);
5107
5108                 data |= CGCG_EN | CGLS_EN;
5109         } else {
5110                 cik_enable_gui_idle_interrupt(rdev, false);
5111
5112                 RREG32(CB_CGTT_SCLK_CTRL);
5113                 RREG32(CB_CGTT_SCLK_CTRL);
5114                 RREG32(CB_CGTT_SCLK_CTRL);
5115                 RREG32(CB_CGTT_SCLK_CTRL);
5116
5117                 data &= ~(CGCG_EN | CGLS_EN);
5118         }
5119
5120         if (orig != data)
5121                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5122
5123 }
5124
5125 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5126 {
5127         u32 data, orig, tmp = 0;
5128
5129         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5130                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5131                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5132                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5133                                 data |= CP_MEM_LS_EN;
5134                                 if (orig != data)
5135                                         WREG32(CP_MEM_SLP_CNTL, data);
5136                         }
5137                 }
5138
5139                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5140                 data &= 0xfffffffd;
5141                 if (orig != data)
5142                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5143
5144                 tmp = cik_halt_rlc(rdev);
5145
5146                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5147                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5148                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5149                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5150                 WREG32(RLC_SERDES_WR_CTRL, data);
5151
5152                 cik_update_rlc(rdev, tmp);
5153
5154                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5155                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5156                         data &= ~SM_MODE_MASK;
5157                         data |= SM_MODE(0x2);
5158                         data |= SM_MODE_ENABLE;
5159                         data &= ~CGTS_OVERRIDE;
5160                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5161                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5162                                 data &= ~CGTS_LS_OVERRIDE;
5163                         data &= ~ON_MONITOR_ADD_MASK;
5164                         data |= ON_MONITOR_ADD_EN;
5165                         data |= ON_MONITOR_ADD(0x96);
5166                         if (orig != data)
5167                                 WREG32(CGTS_SM_CTRL_REG, data);
5168                 }
5169         } else {
5170                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5171                 data |= 0x00000002;
5172                 if (orig != data)
5173                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5174
5175                 data = RREG32(RLC_MEM_SLP_CNTL);
5176                 if (data & RLC_MEM_LS_EN) {
5177                         data &= ~RLC_MEM_LS_EN;
5178                         WREG32(RLC_MEM_SLP_CNTL, data);
5179                 }
5180
5181                 data = RREG32(CP_MEM_SLP_CNTL);
5182                 if (data & CP_MEM_LS_EN) {
5183                         data &= ~CP_MEM_LS_EN;
5184                         WREG32(CP_MEM_SLP_CNTL, data);
5185                 }
5186
5187                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5188                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5189                 if (orig != data)
5190                         WREG32(CGTS_SM_CTRL_REG, data);
5191
5192                 tmp = cik_halt_rlc(rdev);
5193
5194                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5195                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5196                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5197                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5198                 WREG32(RLC_SERDES_WR_CTRL, data);
5199
5200                 cik_update_rlc(rdev, tmp);
5201         }
5202 }
5203
5204 static const u32 mc_cg_registers[] =
5205 {
5206         MC_HUB_MISC_HUB_CG,
5207         MC_HUB_MISC_SIP_CG,
5208         MC_HUB_MISC_VM_CG,
5209         MC_XPB_CLK_GAT,
5210         ATC_MISC_CG,
5211         MC_CITF_MISC_WR_CG,
5212         MC_CITF_MISC_RD_CG,
5213         MC_CITF_MISC_VM_CG,
5214         VM_L2_CG,
5215 };
5216
5217 static void cik_enable_mc_ls(struct radeon_device *rdev,
5218                              bool enable)
5219 {
5220         int i;
5221         u32 orig, data;
5222
5223         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5224                 orig = data = RREG32(mc_cg_registers[i]);
5225                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5226                         data |= MC_LS_ENABLE;
5227                 else
5228                         data &= ~MC_LS_ENABLE;
5229                 if (data != orig)
5230                         WREG32(mc_cg_registers[i], data);
5231         }
5232 }
5233
5234 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5235                                bool enable)
5236 {
5237         int i;
5238         u32 orig, data;
5239
5240         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5241                 orig = data = RREG32(mc_cg_registers[i]);
5242                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5243                         data |= MC_CG_ENABLE;
5244                 else
5245                         data &= ~MC_CG_ENABLE;
5246                 if (data != orig)
5247                         WREG32(mc_cg_registers[i], data);
5248         }
5249 }
5250
5251 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5252                                  bool enable)
5253 {
5254         u32 orig, data;
5255
5256         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5257                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5258                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5259         } else {
5260                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5261                 data |= 0xff000000;
5262                 if (data != orig)
5263                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5264
5265                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5266                 data |= 0xff000000;
5267                 if (data != orig)
5268                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5269         }
5270 }
5271
5272 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5273                                  bool enable)
5274 {
5275         u32 orig, data;
5276
5277         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5278                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5279                 data |= 0x100;
5280                 if (orig != data)
5281                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5282
5283                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5284                 data |= 0x100;
5285                 if (orig != data)
5286                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5287         } else {
5288                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5289                 data &= ~0x100;
5290                 if (orig != data)
5291                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5292
5293                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5294                 data &= ~0x100;
5295                 if (orig != data)
5296                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5297         }
5298 }
5299
5300 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5301                                 bool enable)
5302 {
5303         u32 orig, data;
5304
5305         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5306                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5307                 data = 0xfff;
5308                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5309
5310                 orig = data = RREG32(UVD_CGC_CTRL);
5311                 data |= DCM;
5312                 if (orig != data)
5313                         WREG32(UVD_CGC_CTRL, data);
5314         } else {
5315                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5316                 data &= ~0xfff;
5317                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5318
5319                 orig = data = RREG32(UVD_CGC_CTRL);
5320                 data &= ~DCM;
5321                 if (orig != data)
5322                         WREG32(UVD_CGC_CTRL, data);
5323         }
5324 }
5325
5326 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5327                                bool enable)
5328 {
5329         u32 orig, data;
5330
5331         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5332
5333         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5334                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5335                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5336         else
5337                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5338                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5339
5340         if (orig != data)
5341                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5342 }
5343
5344 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5345                                 bool enable)
5346 {
5347         u32 orig, data;
5348
5349         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5350
5351         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5352                 data &= ~CLOCK_GATING_DIS;
5353         else
5354                 data |= CLOCK_GATING_DIS;
5355
5356         if (orig != data)
5357                 WREG32(HDP_HOST_PATH_CNTL, data);
5358 }
5359
5360 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5361                               bool enable)
5362 {
5363         u32 orig, data;
5364
5365         orig = data = RREG32(HDP_MEM_POWER_LS);
5366
5367         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5368                 data |= HDP_LS_ENABLE;
5369         else
5370                 data &= ~HDP_LS_ENABLE;
5371
5372         if (orig != data)
5373                 WREG32(HDP_MEM_POWER_LS, data);
5374 }
5375
5376 void cik_update_cg(struct radeon_device *rdev,
5377                    u32 block, bool enable)
5378 {
5379         if (block & RADEON_CG_BLOCK_GFX) {
5380                 /* order matters! */
5381                 if (enable) {
5382                         cik_enable_mgcg(rdev, true);
5383                         cik_enable_cgcg(rdev, true);
5384                 } else {
5385                         cik_enable_cgcg(rdev, false);
5386                         cik_enable_mgcg(rdev, false);
5387                 }
5388         }
5389
5390         if (block & RADEON_CG_BLOCK_MC) {
5391                 if (!(rdev->flags & RADEON_IS_IGP)) {
5392                         cik_enable_mc_mgcg(rdev, enable);
5393                         cik_enable_mc_ls(rdev, enable);
5394                 }
5395         }
5396
5397         if (block & RADEON_CG_BLOCK_SDMA) {
5398                 cik_enable_sdma_mgcg(rdev, enable);
5399                 cik_enable_sdma_mgls(rdev, enable);
5400         }
5401
5402         if (block & RADEON_CG_BLOCK_BIF) {
5403                 cik_enable_bif_mgls(rdev, enable);
5404         }
5405
5406         if (block & RADEON_CG_BLOCK_UVD) {
5407                 if (rdev->has_uvd)
5408                         cik_enable_uvd_mgcg(rdev, enable);
5409         }
5410
5411         if (block & RADEON_CG_BLOCK_HDP) {
5412                 cik_enable_hdp_mgcg(rdev, enable);
5413                 cik_enable_hdp_ls(rdev, enable);
5414         }
5415 }
5416
5417 static void cik_init_cg(struct radeon_device *rdev)
5418 {
5419
5420         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5421
5422         if (rdev->has_uvd)
5423                 si_init_uvd_internal_cg(rdev);
5424
5425         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5426                              RADEON_CG_BLOCK_SDMA |
5427                              RADEON_CG_BLOCK_BIF |
5428                              RADEON_CG_BLOCK_UVD |
5429                              RADEON_CG_BLOCK_HDP), true);
5430 }
5431
5432 static void cik_fini_cg(struct radeon_device *rdev)
5433 {
5434         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5435                              RADEON_CG_BLOCK_SDMA |
5436                              RADEON_CG_BLOCK_BIF |
5437                              RADEON_CG_BLOCK_UVD |
5438                              RADEON_CG_BLOCK_HDP), false);
5439
5440         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5441 }
5442
5443 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5444                                           bool enable)
5445 {
5446         u32 data, orig;
5447
5448         orig = data = RREG32(RLC_PG_CNTL);
5449         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5450                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5451         else
5452                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5453         if (orig != data)
5454                 WREG32(RLC_PG_CNTL, data);
5455 }
5456
5457 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5458                                           bool enable)
5459 {
5460         u32 data, orig;
5461
5462         orig = data = RREG32(RLC_PG_CNTL);
5463         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5464                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5465         else
5466                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5467         if (orig != data)
5468                 WREG32(RLC_PG_CNTL, data);
5469 }
5470
5471 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5472 {
5473         u32 data, orig;
5474
5475         orig = data = RREG32(RLC_PG_CNTL);
5476         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5477                 data &= ~DISABLE_CP_PG;
5478         else
5479                 data |= DISABLE_CP_PG;
5480         if (orig != data)
5481                 WREG32(RLC_PG_CNTL, data);
5482 }
5483
5484 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5485 {
5486         u32 data, orig;
5487
5488         orig = data = RREG32(RLC_PG_CNTL);
5489         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5490                 data &= ~DISABLE_GDS_PG;
5491         else
5492                 data |= DISABLE_GDS_PG;
5493         if (orig != data)
5494                 WREG32(RLC_PG_CNTL, data);
5495 }
5496
5497 #define CP_ME_TABLE_SIZE    96
5498 #define CP_ME_TABLE_OFFSET  2048
5499 #define CP_MEC_TABLE_OFFSET 4096
5500
5501 void cik_init_cp_pg_table(struct radeon_device *rdev)
5502 {
5503         const __be32 *fw_data;
5504         volatile u32 *dst_ptr;
5505         int me, i, max_me = 4;
5506         u32 bo_offset = 0;
5507         u32 table_offset;
5508
5509         if (rdev->family == CHIP_KAVERI)
5510                 max_me = 5;
5511
5512         if (rdev->rlc.cp_table_ptr == NULL)
5513                 return;
5514
5515         /* write the cp table buffer */
5516         dst_ptr = rdev->rlc.cp_table_ptr;
5517         for (me = 0; me < max_me; me++) {
5518                 if (me == 0) {
5519                         fw_data = (const __be32 *)rdev->ce_fw->data;
5520                         table_offset = CP_ME_TABLE_OFFSET;
5521                 } else if (me == 1) {
5522                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5523                         table_offset = CP_ME_TABLE_OFFSET;
5524                 } else if (me == 2) {
5525                         fw_data = (const __be32 *)rdev->me_fw->data;
5526                         table_offset = CP_ME_TABLE_OFFSET;
5527                 } else {
5528                         fw_data = (const __be32 *)rdev->mec_fw->data;
5529                         table_offset = CP_MEC_TABLE_OFFSET;
5530                 }
5531
5532                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5533                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5534                 }
5535                 bo_offset += CP_ME_TABLE_SIZE;
5536         }
5537 }
5538
5539 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5540                                 bool enable)
5541 {
5542         u32 data, orig;
5543
5544         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
5545                 orig = data = RREG32(RLC_PG_CNTL);
5546                 data |= GFX_PG_ENABLE;
5547                 if (orig != data)
5548                         WREG32(RLC_PG_CNTL, data);
5549
5550                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5551                 data |= AUTO_PG_EN;
5552                 if (orig != data)
5553                         WREG32(RLC_AUTO_PG_CTRL, data);
5554         } else {
5555                 orig = data = RREG32(RLC_PG_CNTL);
5556                 data &= ~GFX_PG_ENABLE;
5557                 if (orig != data)
5558                         WREG32(RLC_PG_CNTL, data);
5559
5560                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5561                 data &= ~AUTO_PG_EN;
5562                 if (orig != data)
5563                         WREG32(RLC_AUTO_PG_CTRL, data);
5564
5565                 data = RREG32(DB_RENDER_CONTROL);
5566         }
5567 }
5568
5569 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5570 {
5571         u32 mask = 0, tmp, tmp1;
5572         int i;
5573
5574         cik_select_se_sh(rdev, se, sh);
5575         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5576         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5577         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5578
5579         tmp &= 0xffff0000;
5580
5581         tmp |= tmp1;
5582         tmp >>= 16;
5583
5584         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5585                 mask <<= 1;
5586                 mask |= 1;
5587         }
5588
5589         return (~tmp) & mask;
5590 }
5591
5592 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5593 {
5594         u32 i, j, k, active_cu_number = 0;
5595         u32 mask, counter, cu_bitmap;
5596         u32 tmp = 0;
5597
5598         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5599                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5600                         mask = 1;
5601                         cu_bitmap = 0;
5602                         counter = 0;
5603                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5604                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5605                                         if (counter < 2)
5606                                                 cu_bitmap |= mask;
5607                                         counter ++;
5608                                 }
5609                                 mask <<= 1;
5610                         }
5611
5612                         active_cu_number += counter;
5613                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5614                 }
5615         }
5616
5617         WREG32(RLC_PG_AO_CU_MASK, tmp);
5618
5619         tmp = RREG32(RLC_MAX_PG_CU);
5620         tmp &= ~MAX_PU_CU_MASK;
5621         tmp |= MAX_PU_CU(active_cu_number);
5622         WREG32(RLC_MAX_PG_CU, tmp);
5623 }
5624
5625 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5626                                        bool enable)
5627 {
5628         u32 data, orig;
5629
5630         orig = data = RREG32(RLC_PG_CNTL);
5631         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5632                 data |= STATIC_PER_CU_PG_ENABLE;
5633         else
5634                 data &= ~STATIC_PER_CU_PG_ENABLE;
5635         if (orig != data)
5636                 WREG32(RLC_PG_CNTL, data);
5637 }
5638
5639 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5640                                         bool enable)
5641 {
5642         u32 data, orig;
5643
5644         orig = data = RREG32(RLC_PG_CNTL);
5645         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5646                 data |= DYN_PER_CU_PG_ENABLE;
5647         else
5648                 data &= ~DYN_PER_CU_PG_ENABLE;
5649         if (orig != data)
5650                 WREG32(RLC_PG_CNTL, data);
5651 }
5652
5653 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5654 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5655
5656 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5657 {
5658         u32 data, orig;
5659         u32 i;
5660
5661         if (rdev->rlc.cs_data) {
5662                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5663                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5664                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5665                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5666         } else {
5667                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5668                 for (i = 0; i < 3; i++)
5669                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5670         }
5671         if (rdev->rlc.reg_list) {
5672                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5673                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5674                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5675         }
5676
5677         orig = data = RREG32(RLC_PG_CNTL);
5678         data |= GFX_PG_SRC;
5679         if (orig != data)
5680                 WREG32(RLC_PG_CNTL, data);
5681
5682         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5683         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5684
5685         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5686         data &= ~IDLE_POLL_COUNT_MASK;
5687         data |= IDLE_POLL_COUNT(0x60);
5688         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5689
5690         data = 0x10101010;
5691         WREG32(RLC_PG_DELAY, data);
5692
5693         data = RREG32(RLC_PG_DELAY_2);
5694         data &= ~0xff;
5695         data |= 0x3;
5696         WREG32(RLC_PG_DELAY_2, data);
5697
5698         data = RREG32(RLC_AUTO_PG_CTRL);
5699         data &= ~GRBM_REG_SGIT_MASK;
5700         data |= GRBM_REG_SGIT(0x700);
5701         WREG32(RLC_AUTO_PG_CTRL, data);
5702
5703 }
5704
5705 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5706 {
5707         cik_enable_gfx_cgpg(rdev, enable);
5708         cik_enable_gfx_static_mgpg(rdev, enable);
5709         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5710 }
5711
5712 u32 cik_get_csb_size(struct radeon_device *rdev)
5713 {
5714         u32 count = 0;
5715         const struct cs_section_def *sect = NULL;
5716         const struct cs_extent_def *ext = NULL;
5717
5718         if (rdev->rlc.cs_data == NULL)
5719                 return 0;
5720
5721         /* begin clear state */
5722         count += 2;
5723         /* context control state */
5724         count += 3;
5725
5726         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5727                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5728                         if (sect->id == SECT_CONTEXT)
5729                                 count += 2 + ext->reg_count;
5730                         else
5731                                 return 0;
5732                 }
5733         }
5734         /* pa_sc_raster_config/pa_sc_raster_config1 */
5735         count += 4;
5736         /* end clear state */
5737         count += 2;
5738         /* clear state */
5739         count += 2;
5740
5741         return count;
5742 }
5743
5744 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5745 {
5746         u32 count = 0, i;
5747         const struct cs_section_def *sect = NULL;
5748         const struct cs_extent_def *ext = NULL;
5749
5750         if (rdev->rlc.cs_data == NULL)
5751                 return;
5752         if (buffer == NULL)
5753                 return;
5754
5755         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5756         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5757
5758         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5759         buffer[count++] = 0x80000000;
5760         buffer[count++] = 0x80000000;
5761
5762         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5763                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5764                         if (sect->id == SECT_CONTEXT) {
5765                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5766                                 buffer[count++] = ext->reg_index - 0xa000;
5767                                 for (i = 0; i < ext->reg_count; i++)
5768                                         buffer[count++] = ext->extent[i];
5769                         } else {
5770                                 return;
5771                         }
5772                 }
5773         }
5774
5775         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5776         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5777         switch (rdev->family) {
5778         case CHIP_BONAIRE:
5779                 buffer[count++] = 0x16000012;
5780                 buffer[count++] = 0x00000000;
5781                 break;
5782         case CHIP_KAVERI:
5783                 buffer[count++] = 0x00000000; /* XXX */
5784                 buffer[count++] = 0x00000000;
5785                 break;
5786         case CHIP_KABINI:
5787                 buffer[count++] = 0x00000000; /* XXX */
5788                 buffer[count++] = 0x00000000;
5789                 break;
5790         default:
5791                 buffer[count++] = 0x00000000;
5792                 buffer[count++] = 0x00000000;
5793                 break;
5794         }
5795
5796         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5797         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5798
5799         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5800         buffer[count++] = 0;
5801 }
5802
5803 static void cik_init_pg(struct radeon_device *rdev)
5804 {
5805         if (rdev->pg_flags) {
5806                 cik_enable_sck_slowdown_on_pu(rdev, true);
5807                 cik_enable_sck_slowdown_on_pd(rdev, true);
5808                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5809                         cik_init_gfx_cgpg(rdev);
5810                         cik_enable_cp_pg(rdev, true);
5811                         cik_enable_gds_pg(rdev, true);
5812                 }
5813                 cik_init_ao_cu_mask(rdev);
5814                 cik_update_gfx_pg(rdev, true);
5815         }
5816 }
5817
5818 static void cik_fini_pg(struct radeon_device *rdev)
5819 {
5820         if (rdev->pg_flags) {
5821                 cik_update_gfx_pg(rdev, false);
5822                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5823                         cik_enable_cp_pg(rdev, false);
5824                         cik_enable_gds_pg(rdev, false);
5825                 }
5826         }
5827 }
5828
5829 /*
5830  * Interrupts
5831  * Starting with r6xx, interrupts are handled via a ring buffer.
5832  * Ring buffers are areas of GPU accessible memory that the GPU
5833  * writes interrupt vectors into and the host reads vectors out of.
5834  * There is a rptr (read pointer) that determines where the
5835  * host is currently reading, and a wptr (write pointer)
5836  * which determines where the GPU has written.  When the
5837  * pointers are equal, the ring is idle.  When the GPU
5838  * writes vectors to the ring buffer, it increments the
5839  * wptr.  When there is an interrupt, the host then starts
5840  * fetching commands and processing them until the pointers are
5841  * equal again at which point it updates the rptr.
5842  */
5843
5844 /**
5845  * cik_enable_interrupts - Enable the interrupt ring buffer
5846  *
5847  * @rdev: radeon_device pointer
5848  *
5849  * Enable the interrupt ring buffer (CIK).
5850  */
5851 static void cik_enable_interrupts(struct radeon_device *rdev)
5852 {
5853         u32 ih_cntl = RREG32(IH_CNTL);
5854         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5855
5856         ih_cntl |= ENABLE_INTR;
5857         ih_rb_cntl |= IH_RB_ENABLE;
5858         WREG32(IH_CNTL, ih_cntl);
5859         WREG32(IH_RB_CNTL, ih_rb_cntl);
5860         rdev->ih.enabled = true;
5861 }
5862
5863 /**
5864  * cik_disable_interrupts - Disable the interrupt ring buffer
5865  *
5866  * @rdev: radeon_device pointer
5867  *
5868  * Disable the interrupt ring buffer (CIK).
5869  */
5870 static void cik_disable_interrupts(struct radeon_device *rdev)
5871 {
5872         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5873         u32 ih_cntl = RREG32(IH_CNTL);
5874
5875         ih_rb_cntl &= ~IH_RB_ENABLE;
5876         ih_cntl &= ~ENABLE_INTR;
5877         WREG32(IH_RB_CNTL, ih_rb_cntl);
5878         WREG32(IH_CNTL, ih_cntl);
5879         /* set rptr, wptr to 0 */
5880         WREG32(IH_RB_RPTR, 0);
5881         WREG32(IH_RB_WPTR, 0);
5882         rdev->ih.enabled = false;
5883         rdev->ih.rptr = 0;
5884 }
5885
5886 /**
5887  * cik_disable_interrupt_state - Disable all interrupt sources
5888  *
5889  * @rdev: radeon_device pointer
5890  *
5891  * Clear all interrupt enable bits used by the driver (CIK).
5892  */
5893 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5894 {
5895         u32 tmp;
5896
5897         /* gfx ring */
5898         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5899         /* sdma */
5900         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5901         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5902         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5903         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5904         /* compute queues */
5905         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5906         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5907         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5908         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5909         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5910         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5911         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5912         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5913         /* grbm */
5914         WREG32(GRBM_INT_CNTL, 0);
5915         /* vline/vblank, etc. */
5916         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5917         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5918         if (rdev->num_crtc >= 4) {
5919                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5920                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5921         }
5922         if (rdev->num_crtc >= 6) {
5923                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5924                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5925         }
5926
5927         /* dac hotplug */
5928         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5929
5930         /* digital hotplug */
5931         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5932         WREG32(DC_HPD1_INT_CONTROL, tmp);
5933         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5934         WREG32(DC_HPD2_INT_CONTROL, tmp);
5935         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5936         WREG32(DC_HPD3_INT_CONTROL, tmp);
5937         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5938         WREG32(DC_HPD4_INT_CONTROL, tmp);
5939         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5940         WREG32(DC_HPD5_INT_CONTROL, tmp);
5941         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5942         WREG32(DC_HPD6_INT_CONTROL, tmp);
5943
5944 }
5945
5946 /**
5947  * cik_irq_init - init and enable the interrupt ring
5948  *
5949  * @rdev: radeon_device pointer
5950  *
5951  * Allocate a ring buffer for the interrupt controller,
5952  * enable the RLC, disable interrupts, enable the IH
5953  * ring buffer and enable it (CIK).
5954  * Called at device load and reume.
5955  * Returns 0 for success, errors for failure.
5956  */
5957 static int cik_irq_init(struct radeon_device *rdev)
5958 {
5959         int ret = 0;
5960         int rb_bufsz;
5961         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5962
5963         /* allocate ring */
5964         ret = r600_ih_ring_alloc(rdev);
5965         if (ret)
5966                 return ret;
5967
5968         /* disable irqs */
5969         cik_disable_interrupts(rdev);
5970
5971         /* init rlc */
5972         ret = cik_rlc_resume(rdev);
5973         if (ret) {
5974                 r600_ih_ring_fini(rdev);
5975                 return ret;
5976         }
5977
5978         /* setup interrupt control */
5979         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5980         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5981         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5982         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5983          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5984          */
5985         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5986         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5987         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5988         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5989
5990         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5991         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5992
5993         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5994                       IH_WPTR_OVERFLOW_CLEAR |
5995                       (rb_bufsz << 1));
5996
5997         if (rdev->wb.enabled)
5998                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5999
6000         /* set the writeback address whether it's enabled or not */
6001         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6002         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6003
6004         WREG32(IH_RB_CNTL, ih_rb_cntl);
6005
6006         /* set rptr, wptr to 0 */
6007         WREG32(IH_RB_RPTR, 0);
6008         WREG32(IH_RB_WPTR, 0);
6009
6010         /* Default settings for IH_CNTL (disabled at first) */
6011         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6012         /* RPTR_REARM only works if msi's are enabled */
6013         if (rdev->msi_enabled)
6014                 ih_cntl |= RPTR_REARM;
6015         WREG32(IH_CNTL, ih_cntl);
6016
6017         /* force the active interrupt state to all disabled */
6018         cik_disable_interrupt_state(rdev);
6019
6020         pci_set_master(rdev->pdev);
6021
6022         /* enable irqs */
6023         cik_enable_interrupts(rdev);
6024
6025         return ret;
6026 }
6027
6028 /**
6029  * cik_irq_set - enable/disable interrupt sources
6030  *
6031  * @rdev: radeon_device pointer
6032  *
6033  * Enable interrupt sources on the GPU (vblanks, hpd,
6034  * etc.) (CIK).
6035  * Returns 0 for success, errors for failure.
6036  */
6037 int cik_irq_set(struct radeon_device *rdev)
6038 {
6039         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6040                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6041         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6042         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6043         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6044         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6045         u32 grbm_int_cntl = 0;
6046         u32 dma_cntl, dma_cntl1;
6047         u32 thermal_int;
6048
6049         if (!rdev->irq.installed) {
6050                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6051                 return -EINVAL;
6052         }
6053         /* don't enable anything if the ih is disabled */
6054         if (!rdev->ih.enabled) {
6055                 cik_disable_interrupts(rdev);
6056                 /* force the active interrupt state to all disabled */
6057                 cik_disable_interrupt_state(rdev);
6058                 return 0;
6059         }
6060
6061         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6065         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6066         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6067
6068         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6069         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070
6071         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6072         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6073         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6074         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6075         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6076         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6077         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6078         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6079
6080         if (rdev->flags & RADEON_IS_IGP)
6081                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6082                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6083         else
6084                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6085                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6086
6087         /* enable CP interrupts on all rings */
6088         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6089                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6090                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6091         }
6092         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6093                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6094                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6095                 if (ring->me == 1) {
6096                         switch (ring->pipe) {
6097                         case 0:
6098                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6099                                 break;
6100                         case 1:
6101                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6102                                 break;
6103                         case 2:
6104                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6105                                 break;
6106                         case 3:
6107                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6108                                 break;
6109                         default:
6110                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6111                                 break;
6112                         }
6113                 } else if (ring->me == 2) {
6114                         switch (ring->pipe) {
6115                         case 0:
6116                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6117                                 break;
6118                         case 1:
6119                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6120                                 break;
6121                         case 2:
6122                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6123                                 break;
6124                         case 3:
6125                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6126                                 break;
6127                         default:
6128                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6129                                 break;
6130                         }
6131                 } else {
6132                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6133                 }
6134         }
6135         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6136                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6137                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6138                 if (ring->me == 1) {
6139                         switch (ring->pipe) {
6140                         case 0:
6141                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6142                                 break;
6143                         case 1:
6144                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6145                                 break;
6146                         case 2:
6147                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6148                                 break;
6149                         case 3:
6150                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6151                                 break;
6152                         default:
6153                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6154                                 break;
6155                         }
6156                 } else if (ring->me == 2) {
6157                         switch (ring->pipe) {
6158                         case 0:
6159                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6160                                 break;
6161                         case 1:
6162                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6163                                 break;
6164                         case 2:
6165                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6166                                 break;
6167                         case 3:
6168                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6169                                 break;
6170                         default:
6171                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6172                                 break;
6173                         }
6174                 } else {
6175                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6176                 }
6177         }
6178
6179         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6180                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6181                 dma_cntl |= TRAP_ENABLE;
6182         }
6183
6184         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6185                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6186                 dma_cntl1 |= TRAP_ENABLE;
6187         }
6188
6189         if (rdev->irq.crtc_vblank_int[0] ||
6190             atomic_read(&rdev->irq.pflip[0])) {
6191                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6192                 crtc1 |= VBLANK_INTERRUPT_MASK;
6193         }
6194         if (rdev->irq.crtc_vblank_int[1] ||
6195             atomic_read(&rdev->irq.pflip[1])) {
6196                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6197                 crtc2 |= VBLANK_INTERRUPT_MASK;
6198         }
6199         if (rdev->irq.crtc_vblank_int[2] ||
6200             atomic_read(&rdev->irq.pflip[2])) {
6201                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6202                 crtc3 |= VBLANK_INTERRUPT_MASK;
6203         }
6204         if (rdev->irq.crtc_vblank_int[3] ||
6205             atomic_read(&rdev->irq.pflip[3])) {
6206                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6207                 crtc4 |= VBLANK_INTERRUPT_MASK;
6208         }
6209         if (rdev->irq.crtc_vblank_int[4] ||
6210             atomic_read(&rdev->irq.pflip[4])) {
6211                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6212                 crtc5 |= VBLANK_INTERRUPT_MASK;
6213         }
6214         if (rdev->irq.crtc_vblank_int[5] ||
6215             atomic_read(&rdev->irq.pflip[5])) {
6216                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6217                 crtc6 |= VBLANK_INTERRUPT_MASK;
6218         }
6219         if (rdev->irq.hpd[0]) {
6220                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6221                 hpd1 |= DC_HPDx_INT_EN;
6222         }
6223         if (rdev->irq.hpd[1]) {
6224                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6225                 hpd2 |= DC_HPDx_INT_EN;
6226         }
6227         if (rdev->irq.hpd[2]) {
6228                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6229                 hpd3 |= DC_HPDx_INT_EN;
6230         }
6231         if (rdev->irq.hpd[3]) {
6232                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6233                 hpd4 |= DC_HPDx_INT_EN;
6234         }
6235         if (rdev->irq.hpd[4]) {
6236                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6237                 hpd5 |= DC_HPDx_INT_EN;
6238         }
6239         if (rdev->irq.hpd[5]) {
6240                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6241                 hpd6 |= DC_HPDx_INT_EN;
6242         }
6243
6244         if (rdev->irq.dpm_thermal) {
6245                 DRM_DEBUG("dpm thermal\n");
6246                 if (rdev->flags & RADEON_IS_IGP)
6247                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6248                 else
6249                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6250         }
6251
6252         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6253
6254         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6255         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6256
6257         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6258         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6259         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6260         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6261         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6262         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6263         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6264         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6265
6266         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6267
6268         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6269         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6270         if (rdev->num_crtc >= 4) {
6271                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6272                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6273         }
6274         if (rdev->num_crtc >= 6) {
6275                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6276                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6277         }
6278
6279         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6280         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6281         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6282         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6283         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6284         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6285
6286         if (rdev->flags & RADEON_IS_IGP)
6287                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6288         else
6289                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6290
6291         return 0;
6292 }
6293
6294 /**
6295  * cik_irq_ack - ack interrupt sources
6296  *
6297  * @rdev: radeon_device pointer
6298  *
6299  * Ack interrupt sources on the GPU (vblanks, hpd,
6300  * etc.) (CIK).  Certain interrupts sources are sw
6301  * generated and do not require an explicit ack.
6302  */
6303 static inline void cik_irq_ack(struct radeon_device *rdev)
6304 {
6305         u32 tmp;
6306
6307         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6308         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6309         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6310         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6311         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6312         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6313         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6314
6315         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6316                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6317         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6318                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6319         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6320                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6321         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6322                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6323
6324         if (rdev->num_crtc >= 4) {
6325                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6326                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6327                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6328                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6329                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6330                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6331                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6332                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6333         }
6334
6335         if (rdev->num_crtc >= 6) {
6336                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6337                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6338                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6339                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6340                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6341                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6342                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6343                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6344         }
6345
6346         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6347                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6348                 tmp |= DC_HPDx_INT_ACK;
6349                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6350         }
6351         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6352                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6353                 tmp |= DC_HPDx_INT_ACK;
6354                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6355         }
6356         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6357                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6358                 tmp |= DC_HPDx_INT_ACK;
6359                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6360         }
6361         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6362                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6363                 tmp |= DC_HPDx_INT_ACK;
6364                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6365         }
6366         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6367                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6368                 tmp |= DC_HPDx_INT_ACK;
6369                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6370         }
6371         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6372                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6373                 tmp |= DC_HPDx_INT_ACK;
6374                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6375         }
6376 }
6377
6378 /**
6379  * cik_irq_disable - disable interrupts
6380  *
6381  * @rdev: radeon_device pointer
6382  *
6383  * Disable interrupts on the hw (CIK).
6384  */
6385 static void cik_irq_disable(struct radeon_device *rdev)
6386 {
6387         cik_disable_interrupts(rdev);
6388         /* Wait and acknowledge irq */
6389         mdelay(1);
6390         cik_irq_ack(rdev);
6391         cik_disable_interrupt_state(rdev);
6392 }
6393
6394 /**
6395  * cik_irq_disable - disable interrupts for suspend
6396  *
6397  * @rdev: radeon_device pointer
6398  *
6399  * Disable interrupts and stop the RLC (CIK).
6400  * Used for suspend.
6401  */
6402 static void cik_irq_suspend(struct radeon_device *rdev)
6403 {
6404         cik_irq_disable(rdev);
6405         cik_rlc_stop(rdev);
6406 }
6407
6408 /**
6409  * cik_irq_fini - tear down interrupt support
6410  *
6411  * @rdev: radeon_device pointer
6412  *
6413  * Disable interrupts on the hw and free the IH ring
6414  * buffer (CIK).
6415  * Used for driver unload.
6416  */
6417 static void cik_irq_fini(struct radeon_device *rdev)
6418 {
6419         cik_irq_suspend(rdev);
6420         r600_ih_ring_fini(rdev);
6421 }
6422
6423 /**
6424  * cik_get_ih_wptr - get the IH ring buffer wptr
6425  *
6426  * @rdev: radeon_device pointer
6427  *
6428  * Get the IH ring buffer wptr from either the register
6429  * or the writeback memory buffer (CIK).  Also check for
6430  * ring buffer overflow and deal with it.
6431  * Used by cik_irq_process().
6432  * Returns the value of the wptr.
6433  */
6434 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6435 {
6436         u32 wptr, tmp;
6437
6438         if (rdev->wb.enabled)
6439                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6440         else
6441                 wptr = RREG32(IH_RB_WPTR);
6442
6443         if (wptr & RB_OVERFLOW) {
6444                 /* When a ring buffer overflow happen start parsing interrupt
6445                  * from the last not overwritten vector (wptr + 16). Hopefully
6446                  * this should allow us to catchup.
6447                  */
6448                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6449                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6450                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6451                 tmp = RREG32(IH_RB_CNTL);
6452                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6453                 WREG32(IH_RB_CNTL, tmp);
6454         }
6455         return (wptr & rdev->ih.ptr_mask);
6456 }
6457
6458 /*        CIK IV Ring
6459  * Each IV ring entry is 128 bits:
6460  * [7:0]    - interrupt source id
6461  * [31:8]   - reserved
6462  * [59:32]  - interrupt source data
6463  * [63:60]  - reserved
6464  * [71:64]  - RINGID
6465  *            CP:
6466  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6467  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6468  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6469  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6470  *            PIPE_ID - ME0 0=3D
6471  *                    - ME1&2 compute dispatcher (4 pipes each)
6472  *            SDMA:
6473  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6474  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6475  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6476  * [79:72]  - VMID
6477  * [95:80]  - PASID
6478  * [127:96] - reserved
6479  */
6480 /**
6481  * cik_irq_process - interrupt handler
6482  *
6483  * @rdev: radeon_device pointer
6484  *
6485  * Interrupt hander (CIK).  Walk the IH ring,
6486  * ack interrupts and schedule work to handle
6487  * interrupt events.
6488  * Returns irq process return code.
6489  */
6490 int cik_irq_process(struct radeon_device *rdev)
6491 {
6492         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6493         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6494         u32 wptr;
6495         u32 rptr;
6496         u32 src_id, src_data, ring_id;
6497         u8 me_id, pipe_id, queue_id;
6498         u32 ring_index;
6499         bool queue_hotplug = false;
6500         bool queue_reset = false;
6501         u32 addr, status, mc_client;
6502         bool queue_thermal = false;
6503
6504         if (!rdev->ih.enabled || rdev->shutdown)
6505                 return IRQ_NONE;
6506
6507         wptr = cik_get_ih_wptr(rdev);
6508
6509 restart_ih:
6510         /* is somebody else already processing irqs? */
6511         if (atomic_xchg(&rdev->ih.lock, 1))
6512                 return IRQ_NONE;
6513
6514         rptr = rdev->ih.rptr;
6515         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6516
6517         /* Order reading of wptr vs. reading of IH ring data */
6518         rmb();
6519
6520         /* display interrupts */
6521         cik_irq_ack(rdev);
6522
6523         while (rptr != wptr) {
6524                 /* wptr/rptr are in bytes! */
6525                 ring_index = rptr / 4;
6526                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6527                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6528                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6529
6530                 switch (src_id) {
6531                 case 1: /* D1 vblank/vline */
6532                         switch (src_data) {
6533                         case 0: /* D1 vblank */
6534                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6535                                         if (rdev->irq.crtc_vblank_int[0]) {
6536                                                 drm_handle_vblank(rdev->ddev, 0);
6537                                                 rdev->pm.vblank_sync = true;
6538                                                 wake_up(&rdev->irq.vblank_queue);
6539                                         }
6540                                         if (atomic_read(&rdev->irq.pflip[0]))
6541                                                 radeon_crtc_handle_flip(rdev, 0);
6542                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6543                                         DRM_DEBUG("IH: D1 vblank\n");
6544                                 }
6545                                 break;
6546                         case 1: /* D1 vline */
6547                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6548                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6549                                         DRM_DEBUG("IH: D1 vline\n");
6550                                 }
6551                                 break;
6552                         default:
6553                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6554                                 break;
6555                         }
6556                         break;
6557                 case 2: /* D2 vblank/vline */
6558                         switch (src_data) {
6559                         case 0: /* D2 vblank */
6560                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6561                                         if (rdev->irq.crtc_vblank_int[1]) {
6562                                                 drm_handle_vblank(rdev->ddev, 1);
6563                                                 rdev->pm.vblank_sync = true;
6564                                                 wake_up(&rdev->irq.vblank_queue);
6565                                         }
6566                                         if (atomic_read(&rdev->irq.pflip[1]))
6567                                                 radeon_crtc_handle_flip(rdev, 1);
6568                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6569                                         DRM_DEBUG("IH: D2 vblank\n");
6570                                 }
6571                                 break;
6572                         case 1: /* D2 vline */
6573                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6574                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6575                                         DRM_DEBUG("IH: D2 vline\n");
6576                                 }
6577                                 break;
6578                         default:
6579                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6580                                 break;
6581                         }
6582                         break;
6583                 case 3: /* D3 vblank/vline */
6584                         switch (src_data) {
6585                         case 0: /* D3 vblank */
6586                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6587                                         if (rdev->irq.crtc_vblank_int[2]) {
6588                                                 drm_handle_vblank(rdev->ddev, 2);
6589                                                 rdev->pm.vblank_sync = true;
6590                                                 wake_up(&rdev->irq.vblank_queue);
6591                                         }
6592                                         if (atomic_read(&rdev->irq.pflip[2]))
6593                                                 radeon_crtc_handle_flip(rdev, 2);
6594                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6595                                         DRM_DEBUG("IH: D3 vblank\n");
6596                                 }
6597                                 break;
6598                         case 1: /* D3 vline */
6599                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6600                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6601                                         DRM_DEBUG("IH: D3 vline\n");
6602                                 }
6603                                 break;
6604                         default:
6605                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6606                                 break;
6607                         }
6608                         break;
6609                 case 4: /* D4 vblank/vline */
6610                         switch (src_data) {
6611                         case 0: /* D4 vblank */
6612                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6613                                         if (rdev->irq.crtc_vblank_int[3]) {
6614                                                 drm_handle_vblank(rdev->ddev, 3);
6615                                                 rdev->pm.vblank_sync = true;
6616                                                 wake_up(&rdev->irq.vblank_queue);
6617                                         }
6618                                         if (atomic_read(&rdev->irq.pflip[3]))
6619                                                 radeon_crtc_handle_flip(rdev, 3);
6620                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6621                                         DRM_DEBUG("IH: D4 vblank\n");
6622                                 }
6623                                 break;
6624                         case 1: /* D4 vline */
6625                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6626                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6627                                         DRM_DEBUG("IH: D4 vline\n");
6628                                 }
6629                                 break;
6630                         default:
6631                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6632                                 break;
6633                         }
6634                         break;
6635                 case 5: /* D5 vblank/vline */
6636                         switch (src_data) {
6637                         case 0: /* D5 vblank */
6638                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6639                                         if (rdev->irq.crtc_vblank_int[4]) {
6640                                                 drm_handle_vblank(rdev->ddev, 4);
6641                                                 rdev->pm.vblank_sync = true;
6642                                                 wake_up(&rdev->irq.vblank_queue);
6643                                         }
6644                                         if (atomic_read(&rdev->irq.pflip[4]))
6645                                                 radeon_crtc_handle_flip(rdev, 4);
6646                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6647                                         DRM_DEBUG("IH: D5 vblank\n");
6648                                 }
6649                                 break;
6650                         case 1: /* D5 vline */
6651                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6652                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6653                                         DRM_DEBUG("IH: D5 vline\n");
6654                                 }
6655                                 break;
6656                         default:
6657                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6658                                 break;
6659                         }
6660                         break;
6661                 case 6: /* D6 vblank/vline */
6662                         switch (src_data) {
6663                         case 0: /* D6 vblank */
6664                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6665                                         if (rdev->irq.crtc_vblank_int[5]) {
6666                                                 drm_handle_vblank(rdev->ddev, 5);
6667                                                 rdev->pm.vblank_sync = true;
6668                                                 wake_up(&rdev->irq.vblank_queue);
6669                                         }
6670                                         if (atomic_read(&rdev->irq.pflip[5]))
6671                                                 radeon_crtc_handle_flip(rdev, 5);
6672                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6673                                         DRM_DEBUG("IH: D6 vblank\n");
6674                                 }
6675                                 break;
6676                         case 1: /* D6 vline */
6677                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6678                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6679                                         DRM_DEBUG("IH: D6 vline\n");
6680                                 }
6681                                 break;
6682                         default:
6683                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6684                                 break;
6685                         }
6686                         break;
6687                 case 42: /* HPD hotplug */
6688                         switch (src_data) {
6689                         case 0:
6690                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6691                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6692                                         queue_hotplug = true;
6693                                         DRM_DEBUG("IH: HPD1\n");
6694                                 }
6695                                 break;
6696                         case 1:
6697                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6698                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6699                                         queue_hotplug = true;
6700                                         DRM_DEBUG("IH: HPD2\n");
6701                                 }
6702                                 break;
6703                         case 2:
6704                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6705                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6706                                         queue_hotplug = true;
6707                                         DRM_DEBUG("IH: HPD3\n");
6708                                 }
6709                                 break;
6710                         case 3:
6711                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6712                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6713                                         queue_hotplug = true;
6714                                         DRM_DEBUG("IH: HPD4\n");
6715                                 }
6716                                 break;
6717                         case 4:
6718                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6719                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6720                                         queue_hotplug = true;
6721                                         DRM_DEBUG("IH: HPD5\n");
6722                                 }
6723                                 break;
6724                         case 5:
6725                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6726                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6727                                         queue_hotplug = true;
6728                                         DRM_DEBUG("IH: HPD6\n");
6729                                 }
6730                                 break;
6731                         default:
6732                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6733                                 break;
6734                         }
6735                         break;
6736                 case 124: /* UVD */
6737                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6738                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6739                         break;
6740                 case 146:
6741                 case 147:
6742                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6743                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6744                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6745                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6746                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6747                                 addr);
6748                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6749                                 status);
6750                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6751                         /* reset addr and status */
6752                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6753                         break;
6754                 case 176: /* GFX RB CP_INT */
6755                 case 177: /* GFX IB CP_INT */
6756                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6757                         break;
6758                 case 181: /* CP EOP event */
6759                         DRM_DEBUG("IH: CP EOP\n");
6760                         /* XXX check the bitfield order! */
6761                         me_id = (ring_id & 0x60) >> 5;
6762                         pipe_id = (ring_id & 0x18) >> 3;
6763                         queue_id = (ring_id & 0x7) >> 0;
6764                         switch (me_id) {
6765                         case 0:
6766                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6767                                 break;
6768                         case 1:
6769                         case 2:
6770                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6771                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6772                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6773                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6774                                 break;
6775                         }
6776                         break;
6777                 case 184: /* CP Privileged reg access */
6778                         DRM_ERROR("Illegal register access in command stream\n");
6779                         /* XXX check the bitfield order! */
6780                         me_id = (ring_id & 0x60) >> 5;
6781                         pipe_id = (ring_id & 0x18) >> 3;
6782                         queue_id = (ring_id & 0x7) >> 0;
6783                         switch (me_id) {
6784                         case 0:
6785                                 /* This results in a full GPU reset, but all we need to do is soft
6786                                  * reset the CP for gfx
6787                                  */
6788                                 queue_reset = true;
6789                                 break;
6790                         case 1:
6791                                 /* XXX compute */
6792                                 queue_reset = true;
6793                                 break;
6794                         case 2:
6795                                 /* XXX compute */
6796                                 queue_reset = true;
6797                                 break;
6798                         }
6799                         break;
6800                 case 185: /* CP Privileged inst */
6801                         DRM_ERROR("Illegal instruction in command stream\n");
6802                         /* XXX check the bitfield order! */
6803                         me_id = (ring_id & 0x60) >> 5;
6804                         pipe_id = (ring_id & 0x18) >> 3;
6805                         queue_id = (ring_id & 0x7) >> 0;
6806                         switch (me_id) {
6807                         case 0:
6808                                 /* This results in a full GPU reset, but all we need to do is soft
6809                                  * reset the CP for gfx
6810                                  */
6811                                 queue_reset = true;
6812                                 break;
6813                         case 1:
6814                                 /* XXX compute */
6815                                 queue_reset = true;
6816                                 break;
6817                         case 2:
6818                                 /* XXX compute */
6819                                 queue_reset = true;
6820                                 break;
6821                         }
6822                         break;
6823                 case 224: /* SDMA trap event */
6824                         /* XXX check the bitfield order! */
6825                         me_id = (ring_id & 0x3) >> 0;
6826                         queue_id = (ring_id & 0xc) >> 2;
6827                         DRM_DEBUG("IH: SDMA trap\n");
6828                         switch (me_id) {
6829                         case 0:
6830                                 switch (queue_id) {
6831                                 case 0:
6832                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6833                                         break;
6834                                 case 1:
6835                                         /* XXX compute */
6836                                         break;
6837                                 case 2:
6838                                         /* XXX compute */
6839                                         break;
6840                                 }
6841                                 break;
6842                         case 1:
6843                                 switch (queue_id) {
6844                                 case 0:
6845                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6846                                         break;
6847                                 case 1:
6848                                         /* XXX compute */
6849                                         break;
6850                                 case 2:
6851                                         /* XXX compute */
6852                                         break;
6853                                 }
6854                                 break;
6855                         }
6856                         break;
6857                 case 230: /* thermal low to high */
6858                         DRM_DEBUG("IH: thermal low to high\n");
6859                         rdev->pm.dpm.thermal.high_to_low = false;
6860                         queue_thermal = true;
6861                         break;
6862                 case 231: /* thermal high to low */
6863                         DRM_DEBUG("IH: thermal high to low\n");
6864                         rdev->pm.dpm.thermal.high_to_low = true;
6865                         queue_thermal = true;
6866                         break;
6867                 case 233: /* GUI IDLE */
6868                         DRM_DEBUG("IH: GUI idle\n");
6869                         break;
6870                 case 241: /* SDMA Privileged inst */
6871                 case 247: /* SDMA Privileged inst */
6872                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6873                         /* XXX check the bitfield order! */
6874                         me_id = (ring_id & 0x3) >> 0;
6875                         queue_id = (ring_id & 0xc) >> 2;
6876                         switch (me_id) {
6877                         case 0:
6878                                 switch (queue_id) {
6879                                 case 0:
6880                                         queue_reset = true;
6881                                         break;
6882                                 case 1:
6883                                         /* XXX compute */
6884                                         queue_reset = true;
6885                                         break;
6886                                 case 2:
6887                                         /* XXX compute */
6888                                         queue_reset = true;
6889                                         break;
6890                                 }
6891                                 break;
6892                         case 1:
6893                                 switch (queue_id) {
6894                                 case 0:
6895                                         queue_reset = true;
6896                                         break;
6897                                 case 1:
6898                                         /* XXX compute */
6899                                         queue_reset = true;
6900                                         break;
6901                                 case 2:
6902                                         /* XXX compute */
6903                                         queue_reset = true;
6904                                         break;
6905                                 }
6906                                 break;
6907                         }
6908                         break;
6909                 default:
6910                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6911                         break;
6912                 }
6913
6914                 /* wptr/rptr are in bytes! */
6915                 rptr += 16;
6916                 rptr &= rdev->ih.ptr_mask;
6917         }
6918         if (queue_hotplug)
6919                 schedule_work(&rdev->hotplug_work);
6920         if (queue_reset)
6921                 schedule_work(&rdev->reset_work);
6922         if (queue_thermal)
6923                 schedule_work(&rdev->pm.dpm.thermal.work);
6924         rdev->ih.rptr = rptr;
6925         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6926         atomic_set(&rdev->ih.lock, 0);
6927
6928         /* make sure wptr hasn't changed while processing */
6929         wptr = cik_get_ih_wptr(rdev);
6930         if (wptr != rptr)
6931                 goto restart_ih;
6932
6933         return IRQ_HANDLED;
6934 }
6935
6936 /*
6937  * startup/shutdown callbacks
6938  */
6939 /**
6940  * cik_startup - program the asic to a functional state
6941  *
6942  * @rdev: radeon_device pointer
6943  *
6944  * Programs the asic to a functional state (CIK).
6945  * Called by cik_init() and cik_resume().
6946  * Returns 0 for success, error for failure.
6947  */
6948 static int cik_startup(struct radeon_device *rdev)
6949 {
6950         struct radeon_ring *ring;
6951         int r;
6952
6953         /* enable pcie gen2/3 link */
6954         cik_pcie_gen3_enable(rdev);
6955         /* enable aspm */
6956         cik_program_aspm(rdev);
6957
6958         /* scratch needs to be initialized before MC */
6959         r = r600_vram_scratch_init(rdev);
6960         if (r)
6961                 return r;
6962
6963         cik_mc_program(rdev);
6964
6965         if (rdev->flags & RADEON_IS_IGP) {
6966                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6967                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6968                         r = cik_init_microcode(rdev);
6969                         if (r) {
6970                                 DRM_ERROR("Failed to load firmware!\n");
6971                                 return r;
6972                         }
6973                 }
6974         } else {
6975                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6976                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6977                     !rdev->mc_fw) {
6978                         r = cik_init_microcode(rdev);
6979                         if (r) {
6980                                 DRM_ERROR("Failed to load firmware!\n");
6981                                 return r;
6982                         }
6983                 }
6984
6985                 r = ci_mc_load_microcode(rdev);
6986                 if (r) {
6987                         DRM_ERROR("Failed to load MC firmware!\n");
6988                         return r;
6989                 }
6990         }
6991
6992         r = cik_pcie_gart_enable(rdev);
6993         if (r)
6994                 return r;
6995         cik_gpu_init(rdev);
6996
6997         /* allocate rlc buffers */
6998         if (rdev->flags & RADEON_IS_IGP) {
6999                 if (rdev->family == CHIP_KAVERI) {
7000                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7001                         rdev->rlc.reg_list_size =
7002                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7003                 } else {
7004                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7005                         rdev->rlc.reg_list_size =
7006                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7007                 }
7008         }
7009         rdev->rlc.cs_data = ci_cs_data;
7010         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7011         r = sumo_rlc_init(rdev);
7012         if (r) {
7013                 DRM_ERROR("Failed to init rlc BOs!\n");
7014                 return r;
7015         }
7016
7017         /* allocate wb buffer */
7018         r = radeon_wb_init(rdev);
7019         if (r)
7020                 return r;
7021
7022         /* allocate mec buffers */
7023         r = cik_mec_init(rdev);
7024         if (r) {
7025                 DRM_ERROR("Failed to init MEC BOs!\n");
7026                 return r;
7027         }
7028
7029         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7030         if (r) {
7031                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7032                 return r;
7033         }
7034
7035         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7036         if (r) {
7037                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7038                 return r;
7039         }
7040
7041         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7042         if (r) {
7043                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7044                 return r;
7045         }
7046
7047         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7048         if (r) {
7049                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7050                 return r;
7051         }
7052
7053         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7054         if (r) {
7055                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7056                 return r;
7057         }
7058
7059         r = radeon_uvd_resume(rdev);
7060         if (!r) {
7061                 r = uvd_v4_2_resume(rdev);
7062                 if (!r) {
7063                         r = radeon_fence_driver_start_ring(rdev,
7064                                                            R600_RING_TYPE_UVD_INDEX);
7065                         if (r)
7066                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7067                 }
7068         }
7069         if (r)
7070                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7071
7072         /* Enable IRQ */
7073         if (!rdev->irq.installed) {
7074                 r = radeon_irq_kms_init(rdev);
7075                 if (r)
7076                         return r;
7077         }
7078
7079         r = cik_irq_init(rdev);
7080         if (r) {
7081                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7082                 radeon_irq_kms_fini(rdev);
7083                 return r;
7084         }
7085         cik_irq_set(rdev);
7086
7087         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7088         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7089                              CP_RB0_RPTR, CP_RB0_WPTR,
7090                              RADEON_CP_PACKET2);
7091         if (r)
7092                 return r;
7093
7094         /* set up the compute queues */
7095         /* type-2 packets are deprecated on MEC, use type-3 instead */
7096         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7097         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7098                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7099                              PACKET3(PACKET3_NOP, 0x3FFF));
7100         if (r)
7101                 return r;
7102         ring->me = 1; /* first MEC */
7103         ring->pipe = 0; /* first pipe */
7104         ring->queue = 0; /* first queue */
7105         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7106
7107         /* type-2 packets are deprecated on MEC, use type-3 instead */
7108         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7109         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7110                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7111                              PACKET3(PACKET3_NOP, 0x3FFF));
7112         if (r)
7113                 return r;
7114         /* dGPU only have 1 MEC */
7115         ring->me = 1; /* first MEC */
7116         ring->pipe = 0; /* first pipe */
7117         ring->queue = 1; /* second queue */
7118         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7119
7120         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7121         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7122                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7123                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7124                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7125         if (r)
7126                 return r;
7127
7128         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7129         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7130                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7131                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7132                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7133         if (r)
7134                 return r;
7135
7136         r = cik_cp_resume(rdev);
7137         if (r)
7138                 return r;
7139
7140         r = cik_sdma_resume(rdev);
7141         if (r)
7142                 return r;
7143
7144         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7145         if (ring->ring_size) {
7146                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7147                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7148                                      RADEON_CP_PACKET2);
7149                 if (!r)
7150                         r = uvd_v1_0_init(rdev);
7151                 if (r)
7152                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7153         }
7154
7155         r = radeon_ib_pool_init(rdev);
7156         if (r) {
7157                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7158                 return r;
7159         }
7160
7161         r = radeon_vm_manager_init(rdev);
7162         if (r) {
7163                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7164                 return r;
7165         }
7166
7167         r = dce6_audio_init(rdev);
7168         if (r)
7169                 return r;
7170
7171         return 0;
7172 }
7173
7174 /**
7175  * cik_resume - resume the asic to a functional state
7176  *
7177  * @rdev: radeon_device pointer
7178  *
7179  * Programs the asic to a functional state (CIK).
7180  * Called at resume.
7181  * Returns 0 for success, error for failure.
7182  */
7183 int cik_resume(struct radeon_device *rdev)
7184 {
7185         int r;
7186
7187         /* post card */
7188         atom_asic_init(rdev->mode_info.atom_context);
7189
7190         /* init golden registers */
7191         cik_init_golden_registers(rdev);
7192
7193         rdev->accel_working = true;
7194         r = cik_startup(rdev);
7195         if (r) {
7196                 DRM_ERROR("cik startup failed on resume\n");
7197                 rdev->accel_working = false;
7198                 return r;
7199         }
7200
7201         return r;
7202
7203 }
7204
7205 /**
7206  * cik_suspend - suspend the asic
7207  *
7208  * @rdev: radeon_device pointer
7209  *
7210  * Bring the chip into a state suitable for suspend (CIK).
7211  * Called at suspend.
7212  * Returns 0 for success.
7213  */
7214 int cik_suspend(struct radeon_device *rdev)
7215 {
7216         dce6_audio_fini(rdev);
7217         radeon_vm_manager_fini(rdev);
7218         cik_cp_enable(rdev, false);
7219         cik_sdma_enable(rdev, false);
7220         uvd_v1_0_fini(rdev);
7221         radeon_uvd_suspend(rdev);
7222         cik_fini_pg(rdev);
7223         cik_fini_cg(rdev);
7224         cik_irq_suspend(rdev);
7225         radeon_wb_disable(rdev);
7226         cik_pcie_gart_disable(rdev);
7227         return 0;
7228 }
7229
7230 /* Plan is to move initialization in that function and use
7231  * helper function so that radeon_device_init pretty much
7232  * do nothing more than calling asic specific function. This
7233  * should also allow to remove a bunch of callback function
7234  * like vram_info.
7235  */
7236 /**
7237  * cik_init - asic specific driver and hw init
7238  *
7239  * @rdev: radeon_device pointer
7240  *
7241  * Setup asic specific driver variables and program the hw
7242  * to a functional state (CIK).
7243  * Called at driver startup.
7244  * Returns 0 for success, errors for failure.
7245  */
7246 int cik_init(struct radeon_device *rdev)
7247 {
7248         struct radeon_ring *ring;
7249         int r;
7250
7251         /* Read BIOS */
7252         if (!radeon_get_bios(rdev)) {
7253                 if (ASIC_IS_AVIVO(rdev))
7254                         return -EINVAL;
7255         }
7256         /* Must be an ATOMBIOS */
7257         if (!rdev->is_atom_bios) {
7258                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7259                 return -EINVAL;
7260         }
7261         r = radeon_atombios_init(rdev);
7262         if (r)
7263                 return r;
7264
7265         /* Post card if necessary */
7266         if (!radeon_card_posted(rdev)) {
7267                 if (!rdev->bios) {
7268                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7269                         return -EINVAL;
7270                 }
7271                 DRM_INFO("GPU not posted. posting now...\n");
7272                 atom_asic_init(rdev->mode_info.atom_context);
7273         }
7274         /* init golden registers */
7275         cik_init_golden_registers(rdev);
7276         /* Initialize scratch registers */
7277         cik_scratch_init(rdev);
7278         /* Initialize surface registers */
7279         radeon_surface_init(rdev);
7280         /* Initialize clocks */
7281         radeon_get_clock_info(rdev->ddev);
7282
7283         /* Fence driver */
7284         r = radeon_fence_driver_init(rdev);
7285         if (r)
7286                 return r;
7287
7288         /* initialize memory controller */
7289         r = cik_mc_init(rdev);
7290         if (r)
7291                 return r;
7292         /* Memory manager */
7293         r = radeon_bo_init(rdev);
7294         if (r)
7295                 return r;
7296
7297         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7298         ring->ring_obj = NULL;
7299         r600_ring_init(rdev, ring, 1024 * 1024);
7300
7301         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7302         ring->ring_obj = NULL;
7303         r600_ring_init(rdev, ring, 1024 * 1024);
7304         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7305         if (r)
7306                 return r;
7307
7308         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7309         ring->ring_obj = NULL;
7310         r600_ring_init(rdev, ring, 1024 * 1024);
7311         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7312         if (r)
7313                 return r;
7314
7315         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7316         ring->ring_obj = NULL;
7317         r600_ring_init(rdev, ring, 256 * 1024);
7318
7319         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7320         ring->ring_obj = NULL;
7321         r600_ring_init(rdev, ring, 256 * 1024);
7322
7323         r = radeon_uvd_init(rdev);
7324         if (!r) {
7325                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7326                 ring->ring_obj = NULL;
7327                 r600_ring_init(rdev, ring, 4096);
7328         }
7329
7330         rdev->ih.ring_obj = NULL;
7331         r600_ih_ring_init(rdev, 64 * 1024);
7332
7333         r = r600_pcie_gart_init(rdev);
7334         if (r)
7335                 return r;
7336
7337         rdev->accel_working = true;
7338         r = cik_startup(rdev);
7339         if (r) {
7340                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7341                 cik_cp_fini(rdev);
7342                 cik_sdma_fini(rdev);
7343                 cik_irq_fini(rdev);
7344                 sumo_rlc_fini(rdev);
7345                 cik_mec_fini(rdev);
7346                 radeon_wb_fini(rdev);
7347                 radeon_ib_pool_fini(rdev);
7348                 radeon_vm_manager_fini(rdev);
7349                 radeon_irq_kms_fini(rdev);
7350                 cik_pcie_gart_fini(rdev);
7351                 rdev->accel_working = false;
7352         }
7353
7354         /* Don't start up if the MC ucode is missing.
7355          * The default clocks and voltages before the MC ucode
7356          * is loaded are not suffient for advanced operations.
7357          */
7358         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7359                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7360                 return -EINVAL;
7361         }
7362
7363         return 0;
7364 }
7365
7366 /**
7367  * cik_fini - asic specific driver and hw fini
7368  *
7369  * @rdev: radeon_device pointer
7370  *
7371  * Tear down the asic specific driver variables and program the hw
7372  * to an idle state (CIK).
7373  * Called at driver unload.
7374  */
7375 void cik_fini(struct radeon_device *rdev)
7376 {
7377         cik_cp_fini(rdev);
7378         cik_sdma_fini(rdev);
7379         cik_fini_pg(rdev);
7380         cik_fini_cg(rdev);
7381         cik_irq_fini(rdev);
7382         sumo_rlc_fini(rdev);
7383         cik_mec_fini(rdev);
7384         radeon_wb_fini(rdev);
7385         radeon_vm_manager_fini(rdev);
7386         radeon_ib_pool_fini(rdev);
7387         radeon_irq_kms_fini(rdev);
7388         uvd_v1_0_fini(rdev);
7389         radeon_uvd_fini(rdev);
7390         cik_pcie_gart_fini(rdev);
7391         r600_vram_scratch_fini(rdev);
7392         radeon_gem_fini(rdev);
7393         radeon_fence_driver_fini(rdev);
7394         radeon_bo_fini(rdev);
7395         radeon_atombios_fini(rdev);
7396         kfree(rdev->bios);
7397         rdev->bios = NULL;
7398 }
7399
7400 /* display watermark setup */
7401 /**
7402  * dce8_line_buffer_adjust - Set up the line buffer
7403  *
7404  * @rdev: radeon_device pointer
7405  * @radeon_crtc: the selected display controller
7406  * @mode: the current display mode on the selected display
7407  * controller
7408  *
7409  * Setup up the line buffer allocation for
7410  * the selected display controller (CIK).
7411  * Returns the line buffer size in pixels.
7412  */
7413 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7414                                    struct radeon_crtc *radeon_crtc,
7415                                    struct drm_display_mode *mode)
7416 {
7417         u32 tmp, buffer_alloc, i;
7418         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7419         /*
7420          * Line Buffer Setup
7421          * There are 6 line buffers, one for each display controllers.
7422          * There are 3 partitions per LB. Select the number of partitions
7423          * to enable based on the display width.  For display widths larger
7424          * than 4096, you need use to use 2 display controllers and combine
7425          * them using the stereo blender.
7426          */
7427         if (radeon_crtc->base.enabled && mode) {
7428                 if (mode->crtc_hdisplay < 1920) {
7429                         tmp = 1;
7430                         buffer_alloc = 2;
7431                 } else if (mode->crtc_hdisplay < 2560) {
7432                         tmp = 2;
7433                         buffer_alloc = 2;
7434                 } else if (mode->crtc_hdisplay < 4096) {
7435                         tmp = 0;
7436                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7437                 } else {
7438                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7439                         tmp = 0;
7440                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7441                 }
7442         } else {
7443                 tmp = 1;
7444                 buffer_alloc = 0;
7445         }
7446
7447         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7448                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7449
7450         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7451                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7452         for (i = 0; i < rdev->usec_timeout; i++) {
7453                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7454                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7455                         break;
7456                 udelay(1);
7457         }
7458
7459         if (radeon_crtc->base.enabled && mode) {
7460                 switch (tmp) {
7461                 case 0:
7462                 default:
7463                         return 4096 * 2;
7464                 case 1:
7465                         return 1920 * 2;
7466                 case 2:
7467                         return 2560 * 2;
7468                 }
7469         }
7470
7471         /* controller not enabled, so no lb used */
7472         return 0;
7473 }
7474
7475 /**
7476  * cik_get_number_of_dram_channels - get the number of dram channels
7477  *
7478  * @rdev: radeon_device pointer
7479  *
7480  * Look up the number of video ram channels (CIK).
7481  * Used for display watermark bandwidth calculations
7482  * Returns the number of dram channels
7483  */
7484 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7485 {
7486         u32 tmp = RREG32(MC_SHARED_CHMAP);
7487
7488         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7489         case 0:
7490         default:
7491                 return 1;
7492         case 1:
7493                 return 2;
7494         case 2:
7495                 return 4;
7496         case 3:
7497                 return 8;
7498         case 4:
7499                 return 3;
7500         case 5:
7501                 return 6;
7502         case 6:
7503                 return 10;
7504         case 7:
7505                 return 12;
7506         case 8:
7507                 return 16;
7508         }
7509 }
7510
7511 struct dce8_wm_params {
7512         u32 dram_channels; /* number of dram channels */
7513         u32 yclk;          /* bandwidth per dram data pin in kHz */
7514         u32 sclk;          /* engine clock in kHz */
7515         u32 disp_clk;      /* display clock in kHz */
7516         u32 src_width;     /* viewport width */
7517         u32 active_time;   /* active display time in ns */
7518         u32 blank_time;    /* blank time in ns */
7519         bool interlaced;    /* mode is interlaced */
7520         fixed20_12 vsc;    /* vertical scale ratio */
7521         u32 num_heads;     /* number of active crtcs */
7522         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7523         u32 lb_size;       /* line buffer allocated to pipe */
7524         u32 vtaps;         /* vertical scaler taps */
7525 };
7526
7527 /**
7528  * dce8_dram_bandwidth - get the dram bandwidth
7529  *
7530  * @wm: watermark calculation data
7531  *
7532  * Calculate the raw dram bandwidth (CIK).
7533  * Used for display watermark bandwidth calculations
7534  * Returns the dram bandwidth in MBytes/s
7535  */
7536 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7537 {
7538         /* Calculate raw DRAM Bandwidth */
7539         fixed20_12 dram_efficiency; /* 0.7 */
7540         fixed20_12 yclk, dram_channels, bandwidth;
7541         fixed20_12 a;
7542
7543         a.full = dfixed_const(1000);
7544         yclk.full = dfixed_const(wm->yclk);
7545         yclk.full = dfixed_div(yclk, a);
7546         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7547         a.full = dfixed_const(10);
7548         dram_efficiency.full = dfixed_const(7);
7549         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7550         bandwidth.full = dfixed_mul(dram_channels, yclk);
7551         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7552
7553         return dfixed_trunc(bandwidth);
7554 }
7555
7556 /**
7557  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7558  *
7559  * @wm: watermark calculation data
7560  *
7561  * Calculate the dram bandwidth used for display (CIK).
7562  * Used for display watermark bandwidth calculations
7563  * Returns the dram bandwidth for display in MBytes/s
7564  */
7565 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7566 {
7567         /* Calculate DRAM Bandwidth and the part allocated to display. */
7568         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7569         fixed20_12 yclk, dram_channels, bandwidth;
7570         fixed20_12 a;
7571
7572         a.full = dfixed_const(1000);
7573         yclk.full = dfixed_const(wm->yclk);
7574         yclk.full = dfixed_div(yclk, a);
7575         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7576         a.full = dfixed_const(10);
7577         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7578         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7579         bandwidth.full = dfixed_mul(dram_channels, yclk);
7580         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7581
7582         return dfixed_trunc(bandwidth);
7583 }
7584
7585 /**
7586  * dce8_data_return_bandwidth - get the data return bandwidth
7587  *
7588  * @wm: watermark calculation data
7589  *
7590  * Calculate the data return bandwidth used for display (CIK).
7591  * Used for display watermark bandwidth calculations
7592  * Returns the data return bandwidth in MBytes/s
7593  */
7594 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7595 {
7596         /* Calculate the display Data return Bandwidth */
7597         fixed20_12 return_efficiency; /* 0.8 */
7598         fixed20_12 sclk, bandwidth;
7599         fixed20_12 a;
7600
7601         a.full = dfixed_const(1000);
7602         sclk.full = dfixed_const(wm->sclk);
7603         sclk.full = dfixed_div(sclk, a);
7604         a.full = dfixed_const(10);
7605         return_efficiency.full = dfixed_const(8);
7606         return_efficiency.full = dfixed_div(return_efficiency, a);
7607         a.full = dfixed_const(32);
7608         bandwidth.full = dfixed_mul(a, sclk);
7609         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7610
7611         return dfixed_trunc(bandwidth);
7612 }
7613
7614 /**
7615  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7616  *
7617  * @wm: watermark calculation data
7618  *
7619  * Calculate the dmif bandwidth used for display (CIK).
7620  * Used for display watermark bandwidth calculations
7621  * Returns the dmif bandwidth in MBytes/s
7622  */
7623 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7624 {
7625         /* Calculate the DMIF Request Bandwidth */
7626         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7627         fixed20_12 disp_clk, bandwidth;
7628         fixed20_12 a, b;
7629
7630         a.full = dfixed_const(1000);
7631         disp_clk.full = dfixed_const(wm->disp_clk);
7632         disp_clk.full = dfixed_div(disp_clk, a);
7633         a.full = dfixed_const(32);
7634         b.full = dfixed_mul(a, disp_clk);
7635
7636         a.full = dfixed_const(10);
7637         disp_clk_request_efficiency.full = dfixed_const(8);
7638         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7639
7640         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7641
7642         return dfixed_trunc(bandwidth);
7643 }
7644
7645 /**
7646  * dce8_available_bandwidth - get the min available bandwidth
7647  *
7648  * @wm: watermark calculation data
7649  *
7650  * Calculate the min available bandwidth used for display (CIK).
7651  * Used for display watermark bandwidth calculations
7652  * Returns the min available bandwidth in MBytes/s
7653  */
7654 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7655 {
7656         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7657         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7658         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7659         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7660
7661         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7662 }
7663
7664 /**
7665  * dce8_average_bandwidth - get the average available bandwidth
7666  *
7667  * @wm: watermark calculation data
7668  *
7669  * Calculate the average available bandwidth used for display (CIK).
7670  * Used for display watermark bandwidth calculations
7671  * Returns the average available bandwidth in MBytes/s
7672  */
7673 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7674 {
7675         /* Calculate the display mode Average Bandwidth
7676          * DisplayMode should contain the source and destination dimensions,
7677          * timing, etc.
7678          */
7679         fixed20_12 bpp;
7680         fixed20_12 line_time;
7681         fixed20_12 src_width;
7682         fixed20_12 bandwidth;
7683         fixed20_12 a;
7684
7685         a.full = dfixed_const(1000);
7686         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7687         line_time.full = dfixed_div(line_time, a);
7688         bpp.full = dfixed_const(wm->bytes_per_pixel);
7689         src_width.full = dfixed_const(wm->src_width);
7690         bandwidth.full = dfixed_mul(src_width, bpp);
7691         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7692         bandwidth.full = dfixed_div(bandwidth, line_time);
7693
7694         return dfixed_trunc(bandwidth);
7695 }
7696
7697 /**
7698  * dce8_latency_watermark - get the latency watermark
7699  *
7700  * @wm: watermark calculation data
7701  *
7702  * Calculate the latency watermark (CIK).
7703  * Used for display watermark bandwidth calculations
7704  * Returns the latency watermark in ns
7705  */
7706 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7707 {
7708         /* First calculate the latency in ns */
7709         u32 mc_latency = 2000; /* 2000 ns. */
7710         u32 available_bandwidth = dce8_available_bandwidth(wm);
7711         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7712         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7713         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7714         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7715                 (wm->num_heads * cursor_line_pair_return_time);
7716         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7717         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7718         u32 tmp, dmif_size = 12288;
7719         fixed20_12 a, b, c;
7720
7721         if (wm->num_heads == 0)
7722                 return 0;
7723
7724         a.full = dfixed_const(2);
7725         b.full = dfixed_const(1);
7726         if ((wm->vsc.full > a.full) ||
7727             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7728             (wm->vtaps >= 5) ||
7729             ((wm->vsc.full >= a.full) && wm->interlaced))
7730                 max_src_lines_per_dst_line = 4;
7731         else
7732                 max_src_lines_per_dst_line = 2;
7733
7734         a.full = dfixed_const(available_bandwidth);
7735         b.full = dfixed_const(wm->num_heads);
7736         a.full = dfixed_div(a, b);
7737
7738         b.full = dfixed_const(mc_latency + 512);
7739         c.full = dfixed_const(wm->disp_clk);
7740         b.full = dfixed_div(b, c);
7741
7742         c.full = dfixed_const(dmif_size);
7743         b.full = dfixed_div(c, b);
7744
7745         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7746
7747         b.full = dfixed_const(1000);
7748         c.full = dfixed_const(wm->disp_clk);
7749         b.full = dfixed_div(c, b);
7750         c.full = dfixed_const(wm->bytes_per_pixel);
7751         b.full = dfixed_mul(b, c);
7752
7753         lb_fill_bw = min(tmp, dfixed_trunc(b));
7754
7755         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7756         b.full = dfixed_const(1000);
7757         c.full = dfixed_const(lb_fill_bw);
7758         b.full = dfixed_div(c, b);
7759         a.full = dfixed_div(a, b);
7760         line_fill_time = dfixed_trunc(a);
7761
7762         if (line_fill_time < wm->active_time)
7763                 return latency;
7764         else
7765                 return latency + (line_fill_time - wm->active_time);
7766
7767 }
7768
7769 /**
7770  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7771  * average and available dram bandwidth
7772  *
7773  * @wm: watermark calculation data
7774  *
7775  * Check if the display average bandwidth fits in the display
7776  * dram bandwidth (CIK).
7777  * Used for display watermark bandwidth calculations
7778  * Returns true if the display fits, false if not.
7779  */
7780 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7781 {
7782         if (dce8_average_bandwidth(wm) <=
7783             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7784                 return true;
7785         else
7786                 return false;
7787 }
7788
7789 /**
7790  * dce8_average_bandwidth_vs_available_bandwidth - check
7791  * average and available bandwidth
7792  *
7793  * @wm: watermark calculation data
7794  *
7795  * Check if the display average bandwidth fits in the display
7796  * available bandwidth (CIK).
7797  * Used for display watermark bandwidth calculations
7798  * Returns true if the display fits, false if not.
7799  */
7800 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7801 {
7802         if (dce8_average_bandwidth(wm) <=
7803             (dce8_available_bandwidth(wm) / wm->num_heads))
7804                 return true;
7805         else
7806                 return false;
7807 }
7808
7809 /**
7810  * dce8_check_latency_hiding - check latency hiding
7811  *
7812  * @wm: watermark calculation data
7813  *
7814  * Check latency hiding (CIK).
7815  * Used for display watermark bandwidth calculations
7816  * Returns true if the display fits, false if not.
7817  */
7818 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7819 {
7820         u32 lb_partitions = wm->lb_size / wm->src_width;
7821         u32 line_time = wm->active_time + wm->blank_time;
7822         u32 latency_tolerant_lines;
7823         u32 latency_hiding;
7824         fixed20_12 a;
7825
7826         a.full = dfixed_const(1);
7827         if (wm->vsc.full > a.full)
7828                 latency_tolerant_lines = 1;
7829         else {
7830                 if (lb_partitions <= (wm->vtaps + 1))
7831                         latency_tolerant_lines = 1;
7832                 else
7833                         latency_tolerant_lines = 2;
7834         }
7835
7836         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7837
7838         if (dce8_latency_watermark(wm) <= latency_hiding)
7839                 return true;
7840         else
7841                 return false;
7842 }
7843
7844 /**
7845  * dce8_program_watermarks - program display watermarks
7846  *
7847  * @rdev: radeon_device pointer
7848  * @radeon_crtc: the selected display controller
7849  * @lb_size: line buffer size
7850  * @num_heads: number of display controllers in use
7851  *
7852  * Calculate and program the display watermarks for the
7853  * selected display controller (CIK).
7854  */
7855 static void dce8_program_watermarks(struct radeon_device *rdev,
7856                                     struct radeon_crtc *radeon_crtc,
7857                                     u32 lb_size, u32 num_heads)
7858 {
7859         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7860         struct dce8_wm_params wm_low, wm_high;
7861         u32 pixel_period;
7862         u32 line_time = 0;
7863         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7864         u32 tmp, wm_mask;
7865
7866         if (radeon_crtc->base.enabled && num_heads && mode) {
7867                 pixel_period = 1000000 / (u32)mode->clock;
7868                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7869
7870                 /* watermark for high clocks */
7871                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7872                     rdev->pm.dpm_enabled) {
7873                         wm_high.yclk =
7874                                 radeon_dpm_get_mclk(rdev, false) * 10;
7875                         wm_high.sclk =
7876                                 radeon_dpm_get_sclk(rdev, false) * 10;
7877                 } else {
7878                         wm_high.yclk = rdev->pm.current_mclk * 10;
7879                         wm_high.sclk = rdev->pm.current_sclk * 10;
7880                 }
7881
7882                 wm_high.disp_clk = mode->clock;
7883                 wm_high.src_width = mode->crtc_hdisplay;
7884                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7885                 wm_high.blank_time = line_time - wm_high.active_time;
7886                 wm_high.interlaced = false;
7887                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7888                         wm_high.interlaced = true;
7889                 wm_high.vsc = radeon_crtc->vsc;
7890                 wm_high.vtaps = 1;
7891                 if (radeon_crtc->rmx_type != RMX_OFF)
7892                         wm_high.vtaps = 2;
7893                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7894                 wm_high.lb_size = lb_size;
7895                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7896                 wm_high.num_heads = num_heads;
7897
7898                 /* set for high clocks */
7899                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7900
7901                 /* possibly force display priority to high */
7902                 /* should really do this at mode validation time... */
7903                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7904                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7905                     !dce8_check_latency_hiding(&wm_high) ||
7906                     (rdev->disp_priority == 2)) {
7907                         DRM_DEBUG_KMS("force priority to high\n");
7908                 }
7909
7910                 /* watermark for low clocks */
7911                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7912                     rdev->pm.dpm_enabled) {
7913                         wm_low.yclk =
7914                                 radeon_dpm_get_mclk(rdev, true) * 10;
7915                         wm_low.sclk =
7916                                 radeon_dpm_get_sclk(rdev, true) * 10;
7917                 } else {
7918                         wm_low.yclk = rdev->pm.current_mclk * 10;
7919                         wm_low.sclk = rdev->pm.current_sclk * 10;
7920                 }
7921
7922                 wm_low.disp_clk = mode->clock;
7923                 wm_low.src_width = mode->crtc_hdisplay;
7924                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7925                 wm_low.blank_time = line_time - wm_low.active_time;
7926                 wm_low.interlaced = false;
7927                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7928                         wm_low.interlaced = true;
7929                 wm_low.vsc = radeon_crtc->vsc;
7930                 wm_low.vtaps = 1;
7931                 if (radeon_crtc->rmx_type != RMX_OFF)
7932                         wm_low.vtaps = 2;
7933                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7934                 wm_low.lb_size = lb_size;
7935                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7936                 wm_low.num_heads = num_heads;
7937
7938                 /* set for low clocks */
7939                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7940
7941                 /* possibly force display priority to high */
7942                 /* should really do this at mode validation time... */
7943                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7944                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7945                     !dce8_check_latency_hiding(&wm_low) ||
7946                     (rdev->disp_priority == 2)) {
7947                         DRM_DEBUG_KMS("force priority to high\n");
7948                 }
7949         }
7950
7951         /* select wm A */
7952         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7953         tmp = wm_mask;
7954         tmp &= ~LATENCY_WATERMARK_MASK(3);
7955         tmp |= LATENCY_WATERMARK_MASK(1);
7956         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7957         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7958                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7959                 LATENCY_HIGH_WATERMARK(line_time)));
7960         /* select wm B */
7961         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7962         tmp &= ~LATENCY_WATERMARK_MASK(3);
7963         tmp |= LATENCY_WATERMARK_MASK(2);
7964         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7965         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7966                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7967                 LATENCY_HIGH_WATERMARK(line_time)));
7968         /* restore original selection */
7969         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7970
7971         /* save values for DPM */
7972         radeon_crtc->line_time = line_time;
7973         radeon_crtc->wm_high = latency_watermark_a;
7974         radeon_crtc->wm_low = latency_watermark_b;
7975 }
7976
7977 /**
7978  * dce8_bandwidth_update - program display watermarks
7979  *
7980  * @rdev: radeon_device pointer
7981  *
7982  * Calculate and program the display watermarks and line
7983  * buffer allocation (CIK).
7984  */
7985 void dce8_bandwidth_update(struct radeon_device *rdev)
7986 {
7987         struct drm_display_mode *mode = NULL;
7988         u32 num_heads = 0, lb_size;
7989         int i;
7990
7991         radeon_update_display_priority(rdev);
7992
7993         for (i = 0; i < rdev->num_crtc; i++) {
7994                 if (rdev->mode_info.crtcs[i]->base.enabled)
7995                         num_heads++;
7996         }
7997         for (i = 0; i < rdev->num_crtc; i++) {
7998                 mode = &rdev->mode_info.crtcs[i]->base.mode;
7999                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8000                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8001         }
8002 }
8003
8004 /**
8005  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Fetches a GPU clock counter snapshot (SI).
8010  * Returns the 64 bit clock counter snapshot.
8011  */
8012 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8013 {
8014         uint64_t clock;
8015
8016         mutex_lock(&rdev->gpu_clock_mutex);
8017         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8018         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8019                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8020         mutex_unlock(&rdev->gpu_clock_mutex);
8021         return clock;
8022 }
8023
8024 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8025                               u32 cntl_reg, u32 status_reg)
8026 {
8027         int r, i;
8028         struct atom_clock_dividers dividers;
8029         uint32_t tmp;
8030
8031         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8032                                            clock, false, &dividers);
8033         if (r)
8034                 return r;
8035
8036         tmp = RREG32_SMC(cntl_reg);
8037         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8038         tmp |= dividers.post_divider;
8039         WREG32_SMC(cntl_reg, tmp);
8040
8041         for (i = 0; i < 100; i++) {
8042                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8043                         break;
8044                 mdelay(10);
8045         }
8046         if (i == 100)
8047                 return -ETIMEDOUT;
8048
8049         return 0;
8050 }
8051
8052 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8053 {
8054         int r = 0;
8055
8056         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8057         if (r)
8058                 return r;
8059
8060         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8061         return r;
8062 }
8063
8064 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8065 {
8066         struct pci_dev *root = rdev->pdev->bus->self;
8067         int bridge_pos, gpu_pos;
8068         u32 speed_cntl, mask, current_data_rate;
8069         int ret, i;
8070         u16 tmp16;
8071
8072         if (radeon_pcie_gen2 == 0)
8073                 return;
8074
8075         if (rdev->flags & RADEON_IS_IGP)
8076                 return;
8077
8078         if (!(rdev->flags & RADEON_IS_PCIE))
8079                 return;
8080
8081         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8082         if (ret != 0)
8083                 return;
8084
8085         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8086                 return;
8087
8088         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8089         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8090                 LC_CURRENT_DATA_RATE_SHIFT;
8091         if (mask & DRM_PCIE_SPEED_80) {
8092                 if (current_data_rate == 2) {
8093                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8094                         return;
8095                 }
8096                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8097         } else if (mask & DRM_PCIE_SPEED_50) {
8098                 if (current_data_rate == 1) {
8099                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8100                         return;
8101                 }
8102                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8103         }
8104
8105         bridge_pos = pci_pcie_cap(root);
8106         if (!bridge_pos)
8107                 return;
8108
8109         gpu_pos = pci_pcie_cap(rdev->pdev);
8110         if (!gpu_pos)
8111                 return;
8112
8113         if (mask & DRM_PCIE_SPEED_80) {
8114                 /* re-try equalization if gen3 is not already enabled */
8115                 if (current_data_rate != 2) {
8116                         u16 bridge_cfg, gpu_cfg;
8117                         u16 bridge_cfg2, gpu_cfg2;
8118                         u32 max_lw, current_lw, tmp;
8119
8120                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8121                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8122
8123                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8124                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8125
8126                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8127                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8128
8129                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8130                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8131                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8132
8133                         if (current_lw < max_lw) {
8134                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8135                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8136                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8137                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8138                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8139                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8140                                 }
8141                         }
8142
8143                         for (i = 0; i < 10; i++) {
8144                                 /* check status */
8145                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8146                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8147                                         break;
8148
8149                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8150                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8151
8152                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8153                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8154
8155                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8156                                 tmp |= LC_SET_QUIESCE;
8157                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8158
8159                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8160                                 tmp |= LC_REDO_EQ;
8161                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8162
8163                                 mdelay(100);
8164
8165                                 /* linkctl */
8166                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8167                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8168                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8169                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8170
8171                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8172                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8173                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8174                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8175
8176                                 /* linkctl2 */
8177                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8178                                 tmp16 &= ~((1 << 4) | (7 << 9));
8179                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8180                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8181
8182                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8183                                 tmp16 &= ~((1 << 4) | (7 << 9));
8184                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8185                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8186
8187                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8188                                 tmp &= ~LC_SET_QUIESCE;
8189                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8190                         }
8191                 }
8192         }
8193
8194         /* set the link speed */
8195         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8196         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8197         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8198
8199         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8200         tmp16 &= ~0xf;
8201         if (mask & DRM_PCIE_SPEED_80)
8202                 tmp16 |= 3; /* gen3 */
8203         else if (mask & DRM_PCIE_SPEED_50)
8204                 tmp16 |= 2; /* gen2 */
8205         else
8206                 tmp16 |= 1; /* gen1 */
8207         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8208
8209         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8210         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8211         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8212
8213         for (i = 0; i < rdev->usec_timeout; i++) {
8214                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8215                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8216                         break;
8217                 udelay(1);
8218         }
8219 }
8220
8221 static void cik_program_aspm(struct radeon_device *rdev)
8222 {
8223         u32 data, orig;
8224         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8225         bool disable_clkreq = false;
8226
8227         if (radeon_aspm == 0)
8228                 return;
8229
8230         /* XXX double check IGPs */
8231         if (rdev->flags & RADEON_IS_IGP)
8232                 return;
8233
8234         if (!(rdev->flags & RADEON_IS_PCIE))
8235                 return;
8236
8237         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8238         data &= ~LC_XMIT_N_FTS_MASK;
8239         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8240         if (orig != data)
8241                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8242
8243         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8244         data |= LC_GO_TO_RECOVERY;
8245         if (orig != data)
8246                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8247
8248         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8249         data |= P_IGNORE_EDB_ERR;
8250         if (orig != data)
8251                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8252
8253         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8254         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8255         data |= LC_PMI_TO_L1_DIS;
8256         if (!disable_l0s)
8257                 data |= LC_L0S_INACTIVITY(7);
8258
8259         if (!disable_l1) {
8260                 data |= LC_L1_INACTIVITY(7);
8261                 data &= ~LC_PMI_TO_L1_DIS;
8262                 if (orig != data)
8263                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8264
8265                 if (!disable_plloff_in_l1) {
8266                         bool clk_req_support;
8267
8268                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8269                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8270                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8271                         if (orig != data)
8272                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8273
8274                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8275                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8276                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8277                         if (orig != data)
8278                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8279
8280                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8281                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8282                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8283                         if (orig != data)
8284                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8285
8286                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8287                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8288                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8289                         if (orig != data)
8290                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8291
8292                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8293                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8294                         data |= LC_DYN_LANES_PWR_STATE(3);
8295                         if (orig != data)
8296                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8297
8298                         if (!disable_clkreq) {
8299                                 struct pci_dev *root = rdev->pdev->bus->self;
8300                                 u32 lnkcap;
8301
8302                                 clk_req_support = false;
8303                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8304                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8305                                         clk_req_support = true;
8306                         } else {
8307                                 clk_req_support = false;
8308                         }
8309
8310                         if (clk_req_support) {
8311                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8312                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8313                                 if (orig != data)
8314                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8315
8316                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8317                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8318                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8319                                 if (orig != data)
8320                                         WREG32_SMC(THM_CLK_CNTL, data);
8321
8322                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8323                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8324                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8325                                 if (orig != data)
8326                                         WREG32_SMC(MISC_CLK_CTRL, data);
8327
8328                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8329                                 data &= ~BCLK_AS_XCLK;
8330                                 if (orig != data)
8331                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8332
8333                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8334                                 data &= ~FORCE_BIF_REFCLK_EN;
8335                                 if (orig != data)
8336                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8337
8338                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8339                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8340                                 data |= MPLL_CLKOUT_SEL(4);
8341                                 if (orig != data)
8342                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8343                         }
8344                 }
8345         } else {
8346                 if (orig != data)
8347                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8348         }
8349
8350         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8351         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8352         if (orig != data)
8353                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8354
8355         if (!disable_l0s) {
8356                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8357                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8358                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8359                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8360                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8361                                 data &= ~LC_L0S_INACTIVITY_MASK;
8362                                 if (orig != data)
8363                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8364                         }
8365                 }
8366         }
8367 }