]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge branch 'acpi-video-next' into linux-next
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83                                           bool enable);
84
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
87 {
88         u32 temp;
89         int actual_temp = 0;
90
91         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92                 CTF_TEMP_SHIFT;
93
94         if (temp & 0x200)
95                 actual_temp = 255;
96         else
97                 actual_temp = temp & 0x1ff;
98
99         actual_temp = actual_temp * 1000;
100
101         return actual_temp;
102 }
103
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
106 {
107         u32 temp;
108         int actual_temp = 0;
109
110         temp = RREG32_SMC(0xC0300E0C);
111
112         if (temp)
113                 actual_temp = (temp / 8) - 49;
114         else
115                 actual_temp = 0;
116
117         actual_temp = actual_temp * 1000;
118
119         return actual_temp;
120 }
121
122 /*
123  * Indirect registers accessor
124  */
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
126 {
127         unsigned long flags;
128         u32 r;
129
130         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131         WREG32(PCIE_INDEX, reg);
132         (void)RREG32(PCIE_INDEX);
133         r = RREG32(PCIE_DATA);
134         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135         return r;
136 }
137
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
139 {
140         unsigned long flags;
141
142         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143         WREG32(PCIE_INDEX, reg);
144         (void)RREG32(PCIE_INDEX);
145         WREG32(PCIE_DATA, v);
146         (void)RREG32(PCIE_DATA);
147         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 }
149
150 static const u32 spectre_rlc_save_restore_register_list[] =
151 {
152         (0x0e00 << 16) | (0xc12c >> 2),
153         0x00000000,
154         (0x0e00 << 16) | (0xc140 >> 2),
155         0x00000000,
156         (0x0e00 << 16) | (0xc150 >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc15c >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc168 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc170 >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc178 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc204 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc2b4 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc2b8 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2bc >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0xc2c0 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0x8228 >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0x829c >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0x869c >> 2),
181         0x00000000,
182         (0x0600 << 16) | (0x98f4 >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x98f8 >> 2),
185         0x00000000,
186         (0x0e00 << 16) | (0x9900 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0xc260 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x90e8 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0x3c000 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x3c00c >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x8c1c >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0x9700 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0xcd20 >> 2),
201         0x00000000,
202         (0x4e00 << 16) | (0xcd20 >> 2),
203         0x00000000,
204         (0x5e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x6e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x7e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x8e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0x9e00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0xae00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0xbe00 << 16) | (0xcd20 >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x89bc >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0x8900 >> 2),
221         0x00000000,
222         0x3,
223         (0x0e00 << 16) | (0xc130 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc134 >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc1fc >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc208 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc264 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc268 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc26c >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc270 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc274 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc278 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc27c >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc280 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc284 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc288 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc28c >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc290 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc294 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc298 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc29c >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc2a0 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc2a4 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2a8 >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2ac  >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc2b0 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0x301d0 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0x30238 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x30250 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30254 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x30258 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0x3025c >> 2),
282         0x00000000,
283         (0x4e00 << 16) | (0xc900 >> 2),
284         0x00000000,
285         (0x5e00 << 16) | (0xc900 >> 2),
286         0x00000000,
287         (0x6e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x7e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x8e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0x9e00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0xae00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0xbe00 << 16) | (0xc900 >> 2),
298         0x00000000,
299         (0x4e00 << 16) | (0xc904 >> 2),
300         0x00000000,
301         (0x5e00 << 16) | (0xc904 >> 2),
302         0x00000000,
303         (0x6e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x7e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x8e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0x9e00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0xae00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0xbe00 << 16) | (0xc904 >> 2),
314         0x00000000,
315         (0x4e00 << 16) | (0xc908 >> 2),
316         0x00000000,
317         (0x5e00 << 16) | (0xc908 >> 2),
318         0x00000000,
319         (0x6e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x7e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x8e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0x9e00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0xae00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0xbe00 << 16) | (0xc908 >> 2),
330         0x00000000,
331         (0x4e00 << 16) | (0xc90c >> 2),
332         0x00000000,
333         (0x5e00 << 16) | (0xc90c >> 2),
334         0x00000000,
335         (0x6e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x7e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x8e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0x9e00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0xae00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0xbe00 << 16) | (0xc90c >> 2),
346         0x00000000,
347         (0x4e00 << 16) | (0xc910 >> 2),
348         0x00000000,
349         (0x5e00 << 16) | (0xc910 >> 2),
350         0x00000000,
351         (0x6e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x7e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x8e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0x9e00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0xae00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0xbe00 << 16) | (0xc910 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc99c >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0x9834 >> 2),
366         0x00000000,
367         (0x0000 << 16) | (0x30f00 >> 2),
368         0x00000000,
369         (0x0001 << 16) | (0x30f00 >> 2),
370         0x00000000,
371         (0x0000 << 16) | (0x30f04 >> 2),
372         0x00000000,
373         (0x0001 << 16) | (0x30f04 >> 2),
374         0x00000000,
375         (0x0000 << 16) | (0x30f08 >> 2),
376         0x00000000,
377         (0x0001 << 16) | (0x30f08 >> 2),
378         0x00000000,
379         (0x0000 << 16) | (0x30f0c >> 2),
380         0x00000000,
381         (0x0001 << 16) | (0x30f0c >> 2),
382         0x00000000,
383         (0x0600 << 16) | (0x9b7c >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0x8a14 >> 2),
386         0x00000000,
387         (0x0e00 << 16) | (0x8a18 >> 2),
388         0x00000000,
389         (0x0600 << 16) | (0x30a00 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8bf0 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x8bcc >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x8b24 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x30a04 >> 2),
398         0x00000000,
399         (0x0600 << 16) | (0x30a10 >> 2),
400         0x00000000,
401         (0x0600 << 16) | (0x30a14 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a18 >> 2),
404         0x00000000,
405         (0x0600 << 16) | (0x30a2c >> 2),
406         0x00000000,
407         (0x0e00 << 16) | (0xc700 >> 2),
408         0x00000000,
409         (0x0e00 << 16) | (0xc704 >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc708 >> 2),
412         0x00000000,
413         (0x0e00 << 16) | (0xc768 >> 2),
414         0x00000000,
415         (0x0400 << 16) | (0xc770 >> 2),
416         0x00000000,
417         (0x0400 << 16) | (0xc774 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc778 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc77c >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc780 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc784 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc788 >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc78c >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc798 >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc79c >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc7a0 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc7a4 >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7a8 >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7ac >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7b0 >> 2),
444         0x00000000,
445         (0x0400 << 16) | (0xc7b4 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0x9100 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x3c010 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x92a8 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x92ac >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92b4 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92b8 >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92bc >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92c0 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92c4 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92c8 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92cc >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x92d0 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x8c00 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x8c04 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c20 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c38 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8c3c >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0xae00 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x9604 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xac08 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xac0c >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac10 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac14 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac58 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac68 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac6c >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac70 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac74 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac78 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac7c >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac80 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac84 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac88 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xac8c >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x970c >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x9714 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x9718 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x971c >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x31068 >> 2),
524         0x00000000,
525         (0x4e00 << 16) | (0x31068 >> 2),
526         0x00000000,
527         (0x5e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x6e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x7e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x8e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0x9e00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0xae00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0xbe00 << 16) | (0x31068 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xcd10 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xcd14 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x88b0 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x88b4 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88b8 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x88bc >> 2),
552         0x00000000,
553         (0x0400 << 16) | (0x89c0 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88c4 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x88c8 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88d0 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88d4 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x88d8 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x8980 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x30938 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x3093c >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x30940 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x89a0 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30900 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x30904 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x89b4 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x3c210 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x3c214 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x3c218 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x8904 >> 2),
588         0x00000000,
589         0x5,
590         (0x0e00 << 16) | (0x8c28 >> 2),
591         (0x0e00 << 16) | (0x8c2c >> 2),
592         (0x0e00 << 16) | (0x8c30 >> 2),
593         (0x0e00 << 16) | (0x8c34 >> 2),
594         (0x0e00 << 16) | (0x9600 >> 2),
595 };
596
597 static const u32 kalindi_rlc_save_restore_register_list[] =
598 {
599         (0x0e00 << 16) | (0xc12c >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xc140 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xc150 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc15c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc168 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc170 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc204 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc2b4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc2b8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2bc >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xc2c0 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x8228 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x829c >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x869c >> 2),
626         0x00000000,
627         (0x0600 << 16) | (0x98f4 >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x98f8 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x9900 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xc260 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x90e8 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x3c000 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c00c >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x8c1c >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x9700 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0xcd20 >> 2),
646         0x00000000,
647         (0x4e00 << 16) | (0xcd20 >> 2),
648         0x00000000,
649         (0x5e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x6e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x7e00 << 16) | (0xcd20 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x89bc >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x8900 >> 2),
658         0x00000000,
659         0x3,
660         (0x0e00 << 16) | (0xc130 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc134 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc1fc >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc208 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc264 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc268 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc26c >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc270 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc274 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc28c >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc290 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc294 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc298 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc2a0 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc2a4 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2a8 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc2ac >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x301d0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30238 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30250 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30254 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x30258 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3025c >> 2),
705         0x00000000,
706         (0x4e00 << 16) | (0xc900 >> 2),
707         0x00000000,
708         (0x5e00 << 16) | (0xc900 >> 2),
709         0x00000000,
710         (0x6e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x7e00 << 16) | (0xc900 >> 2),
713         0x00000000,
714         (0x4e00 << 16) | (0xc904 >> 2),
715         0x00000000,
716         (0x5e00 << 16) | (0xc904 >> 2),
717         0x00000000,
718         (0x6e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x7e00 << 16) | (0xc904 >> 2),
721         0x00000000,
722         (0x4e00 << 16) | (0xc908 >> 2),
723         0x00000000,
724         (0x5e00 << 16) | (0xc908 >> 2),
725         0x00000000,
726         (0x6e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x7e00 << 16) | (0xc908 >> 2),
729         0x00000000,
730         (0x4e00 << 16) | (0xc90c >> 2),
731         0x00000000,
732         (0x5e00 << 16) | (0xc90c >> 2),
733         0x00000000,
734         (0x6e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x7e00 << 16) | (0xc90c >> 2),
737         0x00000000,
738         (0x4e00 << 16) | (0xc910 >> 2),
739         0x00000000,
740         (0x5e00 << 16) | (0xc910 >> 2),
741         0x00000000,
742         (0x6e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x7e00 << 16) | (0xc910 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc99c >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9834 >> 2),
749         0x00000000,
750         (0x0000 << 16) | (0x30f00 >> 2),
751         0x00000000,
752         (0x0000 << 16) | (0x30f04 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f08 >> 2),
755         0x00000000,
756         (0x0000 << 16) | (0x30f0c >> 2),
757         0x00000000,
758         (0x0600 << 16) | (0x9b7c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8a14 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8a18 >> 2),
763         0x00000000,
764         (0x0600 << 16) | (0x30a00 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8bf0 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0x8bcc >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x8b24 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x30a04 >> 2),
773         0x00000000,
774         (0x0600 << 16) | (0x30a10 >> 2),
775         0x00000000,
776         (0x0600 << 16) | (0x30a14 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a18 >> 2),
779         0x00000000,
780         (0x0600 << 16) | (0x30a2c >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc700 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc704 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc708 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc768 >> 2),
789         0x00000000,
790         (0x0400 << 16) | (0xc770 >> 2),
791         0x00000000,
792         (0x0400 << 16) | (0xc774 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc798 >> 2),
795         0x00000000,
796         (0x0400 << 16) | (0xc79c >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x9100 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x3c010 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x8c00 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8c04 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c20 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c38 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8c3c >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0xae00 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x9604 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xac08 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xac0c >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac10 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac14 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac58 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac68 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac6c >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac70 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac74 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac78 >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac7c >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac80 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac84 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac88 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xac8c >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x970c >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x9714 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x9718 >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x971c >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x31068 >> 2),
855         0x00000000,
856         (0x4e00 << 16) | (0x31068 >> 2),
857         0x00000000,
858         (0x5e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x6e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x7e00 << 16) | (0x31068 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xcd10 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0xcd14 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x88b0 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x88b4 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88b8 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x88bc >> 2),
875         0x00000000,
876         (0x0400 << 16) | (0x89c0 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88c4 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x88c8 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88d0 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88d4 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x88d8 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x8980 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x30938 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x3093c >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30940 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x89a0 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30900 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x30904 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x89b4 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x3e1fc >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x3c210 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3c214 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x3c218 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x8904 >> 2),
913         0x00000000,
914         0x5,
915         (0x0e00 << 16) | (0x8c28 >> 2),
916         (0x0e00 << 16) | (0x8c2c >> 2),
917         (0x0e00 << 16) | (0x8c30 >> 2),
918         (0x0e00 << 16) | (0x8c34 >> 2),
919         (0x0e00 << 16) | (0x9600 >> 2),
920 };
921
922 static const u32 bonaire_golden_spm_registers[] =
923 {
924         0x30800, 0xe0ffffff, 0xe0000000
925 };
926
927 static const u32 bonaire_golden_common_registers[] =
928 {
929         0xc770, 0xffffffff, 0x00000800,
930         0xc774, 0xffffffff, 0x00000800,
931         0xc798, 0xffffffff, 0x00007fbf,
932         0xc79c, 0xffffffff, 0x00007faf
933 };
934
935 static const u32 bonaire_golden_registers[] =
936 {
937         0x3354, 0x00000333, 0x00000333,
938         0x3350, 0x000c0fc0, 0x00040200,
939         0x9a10, 0x00010000, 0x00058208,
940         0x3c000, 0xffff1fff, 0x00140000,
941         0x3c200, 0xfdfc0fff, 0x00000100,
942         0x3c234, 0x40000000, 0x40000200,
943         0x9830, 0xffffffff, 0x00000000,
944         0x9834, 0xf00fffff, 0x00000400,
945         0x9838, 0x0002021c, 0x00020200,
946         0xc78, 0x00000080, 0x00000000,
947         0x5bb0, 0x000000f0, 0x00000070,
948         0x5bc0, 0xf0311fff, 0x80300000,
949         0x98f8, 0x73773777, 0x12010001,
950         0x350c, 0x00810000, 0x408af000,
951         0x7030, 0x31000111, 0x00000011,
952         0x2f48, 0x73773777, 0x12010001,
953         0x220c, 0x00007fb6, 0x0021a1b1,
954         0x2210, 0x00007fb6, 0x002021b1,
955         0x2180, 0x00007fb6, 0x00002191,
956         0x2218, 0x00007fb6, 0x002121b1,
957         0x221c, 0x00007fb6, 0x002021b1,
958         0x21dc, 0x00007fb6, 0x00002191,
959         0x21e0, 0x00007fb6, 0x00002191,
960         0x3628, 0x0000003f, 0x0000000a,
961         0x362c, 0x0000003f, 0x0000000a,
962         0x2ae4, 0x00073ffe, 0x000022a2,
963         0x240c, 0x000007ff, 0x00000000,
964         0x8a14, 0xf000003f, 0x00000007,
965         0x8bf0, 0x00002001, 0x00000001,
966         0x8b24, 0xffffffff, 0x00ffffff,
967         0x30a04, 0x0000ff0f, 0x00000000,
968         0x28a4c, 0x07ffffff, 0x06000000,
969         0x4d8, 0x00000fff, 0x00000100,
970         0x3e78, 0x00000001, 0x00000002,
971         0x9100, 0x03000000, 0x0362c688,
972         0x8c00, 0x000000ff, 0x00000001,
973         0xe40, 0x00001fff, 0x00001fff,
974         0x9060, 0x0000007f, 0x00000020,
975         0x9508, 0x00010000, 0x00010000,
976         0xac14, 0x000003ff, 0x000000f3,
977         0xac0c, 0xffffffff, 0x00001032
978 };
979
980 static const u32 bonaire_mgcg_cgcg_init[] =
981 {
982         0xc420, 0xffffffff, 0xfffffffc,
983         0x30800, 0xffffffff, 0xe0000000,
984         0x3c2a0, 0xffffffff, 0x00000100,
985         0x3c208, 0xffffffff, 0x00000100,
986         0x3c2c0, 0xffffffff, 0xc0000100,
987         0x3c2c8, 0xffffffff, 0xc0000100,
988         0x3c2c4, 0xffffffff, 0xc0000100,
989         0x55e4, 0xffffffff, 0x00600100,
990         0x3c280, 0xffffffff, 0x00000100,
991         0x3c214, 0xffffffff, 0x06000100,
992         0x3c220, 0xffffffff, 0x00000100,
993         0x3c218, 0xffffffff, 0x06000100,
994         0x3c204, 0xffffffff, 0x00000100,
995         0x3c2e0, 0xffffffff, 0x00000100,
996         0x3c224, 0xffffffff, 0x00000100,
997         0x3c200, 0xffffffff, 0x00000100,
998         0x3c230, 0xffffffff, 0x00000100,
999         0x3c234, 0xffffffff, 0x00000100,
1000         0x3c250, 0xffffffff, 0x00000100,
1001         0x3c254, 0xffffffff, 0x00000100,
1002         0x3c258, 0xffffffff, 0x00000100,
1003         0x3c25c, 0xffffffff, 0x00000100,
1004         0x3c260, 0xffffffff, 0x00000100,
1005         0x3c27c, 0xffffffff, 0x00000100,
1006         0x3c278, 0xffffffff, 0x00000100,
1007         0x3c210, 0xffffffff, 0x06000100,
1008         0x3c290, 0xffffffff, 0x00000100,
1009         0x3c274, 0xffffffff, 0x00000100,
1010         0x3c2b4, 0xffffffff, 0x00000100,
1011         0x3c2b0, 0xffffffff, 0x00000100,
1012         0x3c270, 0xffffffff, 0x00000100,
1013         0x30800, 0xffffffff, 0xe0000000,
1014         0x3c020, 0xffffffff, 0x00010000,
1015         0x3c024, 0xffffffff, 0x00030002,
1016         0x3c028, 0xffffffff, 0x00040007,
1017         0x3c02c, 0xffffffff, 0x00060005,
1018         0x3c030, 0xffffffff, 0x00090008,
1019         0x3c034, 0xffffffff, 0x00010000,
1020         0x3c038, 0xffffffff, 0x00030002,
1021         0x3c03c, 0xffffffff, 0x00040007,
1022         0x3c040, 0xffffffff, 0x00060005,
1023         0x3c044, 0xffffffff, 0x00090008,
1024         0x3c048, 0xffffffff, 0x00010000,
1025         0x3c04c, 0xffffffff, 0x00030002,
1026         0x3c050, 0xffffffff, 0x00040007,
1027         0x3c054, 0xffffffff, 0x00060005,
1028         0x3c058, 0xffffffff, 0x00090008,
1029         0x3c05c, 0xffffffff, 0x00010000,
1030         0x3c060, 0xffffffff, 0x00030002,
1031         0x3c064, 0xffffffff, 0x00040007,
1032         0x3c068, 0xffffffff, 0x00060005,
1033         0x3c06c, 0xffffffff, 0x00090008,
1034         0x3c070, 0xffffffff, 0x00010000,
1035         0x3c074, 0xffffffff, 0x00030002,
1036         0x3c078, 0xffffffff, 0x00040007,
1037         0x3c07c, 0xffffffff, 0x00060005,
1038         0x3c080, 0xffffffff, 0x00090008,
1039         0x3c084, 0xffffffff, 0x00010000,
1040         0x3c088, 0xffffffff, 0x00030002,
1041         0x3c08c, 0xffffffff, 0x00040007,
1042         0x3c090, 0xffffffff, 0x00060005,
1043         0x3c094, 0xffffffff, 0x00090008,
1044         0x3c098, 0xffffffff, 0x00010000,
1045         0x3c09c, 0xffffffff, 0x00030002,
1046         0x3c0a0, 0xffffffff, 0x00040007,
1047         0x3c0a4, 0xffffffff, 0x00060005,
1048         0x3c0a8, 0xffffffff, 0x00090008,
1049         0x3c000, 0xffffffff, 0x96e00200,
1050         0x8708, 0xffffffff, 0x00900100,
1051         0xc424, 0xffffffff, 0x0020003f,
1052         0x38, 0xffffffff, 0x0140001c,
1053         0x3c, 0x000f0000, 0x000f0000,
1054         0x220, 0xffffffff, 0xC060000C,
1055         0x224, 0xc0000fff, 0x00000100,
1056         0xf90, 0xffffffff, 0x00000100,
1057         0xf98, 0x00000101, 0x00000000,
1058         0x20a8, 0xffffffff, 0x00000104,
1059         0x55e4, 0xff000fff, 0x00000100,
1060         0x30cc, 0xc0000fff, 0x00000104,
1061         0xc1e4, 0x00000001, 0x00000001,
1062         0xd00c, 0xff000ff0, 0x00000100,
1063         0xd80c, 0xff000ff0, 0x00000100
1064 };
1065
1066 static const u32 spectre_golden_spm_registers[] =
1067 {
1068         0x30800, 0xe0ffffff, 0xe0000000
1069 };
1070
1071 static const u32 spectre_golden_common_registers[] =
1072 {
1073         0xc770, 0xffffffff, 0x00000800,
1074         0xc774, 0xffffffff, 0x00000800,
1075         0xc798, 0xffffffff, 0x00007fbf,
1076         0xc79c, 0xffffffff, 0x00007faf
1077 };
1078
1079 static const u32 spectre_golden_registers[] =
1080 {
1081         0x3c000, 0xffff1fff, 0x96940200,
1082         0x3c00c, 0xffff0001, 0xff000000,
1083         0x3c200, 0xfffc0fff, 0x00000100,
1084         0x6ed8, 0x00010101, 0x00010000,
1085         0x9834, 0xf00fffff, 0x00000400,
1086         0x9838, 0xfffffffc, 0x00020200,
1087         0x5bb0, 0x000000f0, 0x00000070,
1088         0x5bc0, 0xf0311fff, 0x80300000,
1089         0x98f8, 0x73773777, 0x12010001,
1090         0x9b7c, 0x00ff0000, 0x00fc0000,
1091         0x2f48, 0x73773777, 0x12010001,
1092         0x8a14, 0xf000003f, 0x00000007,
1093         0x8b24, 0xffffffff, 0x00ffffff,
1094         0x28350, 0x3f3f3fff, 0x00000082,
1095         0x28355, 0x0000003f, 0x00000000,
1096         0x3e78, 0x00000001, 0x00000002,
1097         0x913c, 0xffff03df, 0x00000004,
1098         0xc768, 0x00000008, 0x00000008,
1099         0x8c00, 0x000008ff, 0x00000800,
1100         0x9508, 0x00010000, 0x00010000,
1101         0xac0c, 0xffffffff, 0x54763210,
1102         0x214f8, 0x01ff01ff, 0x00000002,
1103         0x21498, 0x007ff800, 0x00200000,
1104         0x2015c, 0xffffffff, 0x00000f40,
1105         0x30934, 0xffffffff, 0x00000001
1106 };
1107
1108 static const u32 spectre_mgcg_cgcg_init[] =
1109 {
1110         0xc420, 0xffffffff, 0xfffffffc,
1111         0x30800, 0xffffffff, 0xe0000000,
1112         0x3c2a0, 0xffffffff, 0x00000100,
1113         0x3c208, 0xffffffff, 0x00000100,
1114         0x3c2c0, 0xffffffff, 0x00000100,
1115         0x3c2c8, 0xffffffff, 0x00000100,
1116         0x3c2c4, 0xffffffff, 0x00000100,
1117         0x55e4, 0xffffffff, 0x00600100,
1118         0x3c280, 0xffffffff, 0x00000100,
1119         0x3c214, 0xffffffff, 0x06000100,
1120         0x3c220, 0xffffffff, 0x00000100,
1121         0x3c218, 0xffffffff, 0x06000100,
1122         0x3c204, 0xffffffff, 0x00000100,
1123         0x3c2e0, 0xffffffff, 0x00000100,
1124         0x3c224, 0xffffffff, 0x00000100,
1125         0x3c200, 0xffffffff, 0x00000100,
1126         0x3c230, 0xffffffff, 0x00000100,
1127         0x3c234, 0xffffffff, 0x00000100,
1128         0x3c250, 0xffffffff, 0x00000100,
1129         0x3c254, 0xffffffff, 0x00000100,
1130         0x3c258, 0xffffffff, 0x00000100,
1131         0x3c25c, 0xffffffff, 0x00000100,
1132         0x3c260, 0xffffffff, 0x00000100,
1133         0x3c27c, 0xffffffff, 0x00000100,
1134         0x3c278, 0xffffffff, 0x00000100,
1135         0x3c210, 0xffffffff, 0x06000100,
1136         0x3c290, 0xffffffff, 0x00000100,
1137         0x3c274, 0xffffffff, 0x00000100,
1138         0x3c2b4, 0xffffffff, 0x00000100,
1139         0x3c2b0, 0xffffffff, 0x00000100,
1140         0x3c270, 0xffffffff, 0x00000100,
1141         0x30800, 0xffffffff, 0xe0000000,
1142         0x3c020, 0xffffffff, 0x00010000,
1143         0x3c024, 0xffffffff, 0x00030002,
1144         0x3c028, 0xffffffff, 0x00040007,
1145         0x3c02c, 0xffffffff, 0x00060005,
1146         0x3c030, 0xffffffff, 0x00090008,
1147         0x3c034, 0xffffffff, 0x00010000,
1148         0x3c038, 0xffffffff, 0x00030002,
1149         0x3c03c, 0xffffffff, 0x00040007,
1150         0x3c040, 0xffffffff, 0x00060005,
1151         0x3c044, 0xffffffff, 0x00090008,
1152         0x3c048, 0xffffffff, 0x00010000,
1153         0x3c04c, 0xffffffff, 0x00030002,
1154         0x3c050, 0xffffffff, 0x00040007,
1155         0x3c054, 0xffffffff, 0x00060005,
1156         0x3c058, 0xffffffff, 0x00090008,
1157         0x3c05c, 0xffffffff, 0x00010000,
1158         0x3c060, 0xffffffff, 0x00030002,
1159         0x3c064, 0xffffffff, 0x00040007,
1160         0x3c068, 0xffffffff, 0x00060005,
1161         0x3c06c, 0xffffffff, 0x00090008,
1162         0x3c070, 0xffffffff, 0x00010000,
1163         0x3c074, 0xffffffff, 0x00030002,
1164         0x3c078, 0xffffffff, 0x00040007,
1165         0x3c07c, 0xffffffff, 0x00060005,
1166         0x3c080, 0xffffffff, 0x00090008,
1167         0x3c084, 0xffffffff, 0x00010000,
1168         0x3c088, 0xffffffff, 0x00030002,
1169         0x3c08c, 0xffffffff, 0x00040007,
1170         0x3c090, 0xffffffff, 0x00060005,
1171         0x3c094, 0xffffffff, 0x00090008,
1172         0x3c098, 0xffffffff, 0x00010000,
1173         0x3c09c, 0xffffffff, 0x00030002,
1174         0x3c0a0, 0xffffffff, 0x00040007,
1175         0x3c0a4, 0xffffffff, 0x00060005,
1176         0x3c0a8, 0xffffffff, 0x00090008,
1177         0x3c0ac, 0xffffffff, 0x00010000,
1178         0x3c0b0, 0xffffffff, 0x00030002,
1179         0x3c0b4, 0xffffffff, 0x00040007,
1180         0x3c0b8, 0xffffffff, 0x00060005,
1181         0x3c0bc, 0xffffffff, 0x00090008,
1182         0x3c000, 0xffffffff, 0x96e00200,
1183         0x8708, 0xffffffff, 0x00900100,
1184         0xc424, 0xffffffff, 0x0020003f,
1185         0x38, 0xffffffff, 0x0140001c,
1186         0x3c, 0x000f0000, 0x000f0000,
1187         0x220, 0xffffffff, 0xC060000C,
1188         0x224, 0xc0000fff, 0x00000100,
1189         0xf90, 0xffffffff, 0x00000100,
1190         0xf98, 0x00000101, 0x00000000,
1191         0x20a8, 0xffffffff, 0x00000104,
1192         0x55e4, 0xff000fff, 0x00000100,
1193         0x30cc, 0xc0000fff, 0x00000104,
1194         0xc1e4, 0x00000001, 0x00000001,
1195         0xd00c, 0xff000ff0, 0x00000100,
1196         0xd80c, 0xff000ff0, 0x00000100
1197 };
1198
1199 static const u32 kalindi_golden_spm_registers[] =
1200 {
1201         0x30800, 0xe0ffffff, 0xe0000000
1202 };
1203
1204 static const u32 kalindi_golden_common_registers[] =
1205 {
1206         0xc770, 0xffffffff, 0x00000800,
1207         0xc774, 0xffffffff, 0x00000800,
1208         0xc798, 0xffffffff, 0x00007fbf,
1209         0xc79c, 0xffffffff, 0x00007faf
1210 };
1211
1212 static const u32 kalindi_golden_registers[] =
1213 {
1214         0x3c000, 0xffffdfff, 0x6e944040,
1215         0x55e4, 0xff607fff, 0xfc000100,
1216         0x3c220, 0xff000fff, 0x00000100,
1217         0x3c224, 0xff000fff, 0x00000100,
1218         0x3c200, 0xfffc0fff, 0x00000100,
1219         0x6ed8, 0x00010101, 0x00010000,
1220         0x9830, 0xffffffff, 0x00000000,
1221         0x9834, 0xf00fffff, 0x00000400,
1222         0x5bb0, 0x000000f0, 0x00000070,
1223         0x5bc0, 0xf0311fff, 0x80300000,
1224         0x98f8, 0x73773777, 0x12010001,
1225         0x98fc, 0xffffffff, 0x00000010,
1226         0x9b7c, 0x00ff0000, 0x00fc0000,
1227         0x8030, 0x00001f0f, 0x0000100a,
1228         0x2f48, 0x73773777, 0x12010001,
1229         0x2408, 0x000fffff, 0x000c007f,
1230         0x8a14, 0xf000003f, 0x00000007,
1231         0x8b24, 0x3fff3fff, 0x00ffcfff,
1232         0x30a04, 0x0000ff0f, 0x00000000,
1233         0x28a4c, 0x07ffffff, 0x06000000,
1234         0x4d8, 0x00000fff, 0x00000100,
1235         0x3e78, 0x00000001, 0x00000002,
1236         0xc768, 0x00000008, 0x00000008,
1237         0x8c00, 0x000000ff, 0x00000003,
1238         0x214f8, 0x01ff01ff, 0x00000002,
1239         0x21498, 0x007ff800, 0x00200000,
1240         0x2015c, 0xffffffff, 0x00000f40,
1241         0x88c4, 0x001f3ae3, 0x00000082,
1242         0x88d4, 0x0000001f, 0x00000010,
1243         0x30934, 0xffffffff, 0x00000000
1244 };
1245
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1247 {
1248         0xc420, 0xffffffff, 0xfffffffc,
1249         0x30800, 0xffffffff, 0xe0000000,
1250         0x3c2a0, 0xffffffff, 0x00000100,
1251         0x3c208, 0xffffffff, 0x00000100,
1252         0x3c2c0, 0xffffffff, 0x00000100,
1253         0x3c2c8, 0xffffffff, 0x00000100,
1254         0x3c2c4, 0xffffffff, 0x00000100,
1255         0x55e4, 0xffffffff, 0x00600100,
1256         0x3c280, 0xffffffff, 0x00000100,
1257         0x3c214, 0xffffffff, 0x06000100,
1258         0x3c220, 0xffffffff, 0x00000100,
1259         0x3c218, 0xffffffff, 0x06000100,
1260         0x3c204, 0xffffffff, 0x00000100,
1261         0x3c2e0, 0xffffffff, 0x00000100,
1262         0x3c224, 0xffffffff, 0x00000100,
1263         0x3c200, 0xffffffff, 0x00000100,
1264         0x3c230, 0xffffffff, 0x00000100,
1265         0x3c234, 0xffffffff, 0x00000100,
1266         0x3c250, 0xffffffff, 0x00000100,
1267         0x3c254, 0xffffffff, 0x00000100,
1268         0x3c258, 0xffffffff, 0x00000100,
1269         0x3c25c, 0xffffffff, 0x00000100,
1270         0x3c260, 0xffffffff, 0x00000100,
1271         0x3c27c, 0xffffffff, 0x00000100,
1272         0x3c278, 0xffffffff, 0x00000100,
1273         0x3c210, 0xffffffff, 0x06000100,
1274         0x3c290, 0xffffffff, 0x00000100,
1275         0x3c274, 0xffffffff, 0x00000100,
1276         0x3c2b4, 0xffffffff, 0x00000100,
1277         0x3c2b0, 0xffffffff, 0x00000100,
1278         0x3c270, 0xffffffff, 0x00000100,
1279         0x30800, 0xffffffff, 0xe0000000,
1280         0x3c020, 0xffffffff, 0x00010000,
1281         0x3c024, 0xffffffff, 0x00030002,
1282         0x3c028, 0xffffffff, 0x00040007,
1283         0x3c02c, 0xffffffff, 0x00060005,
1284         0x3c030, 0xffffffff, 0x00090008,
1285         0x3c034, 0xffffffff, 0x00010000,
1286         0x3c038, 0xffffffff, 0x00030002,
1287         0x3c03c, 0xffffffff, 0x00040007,
1288         0x3c040, 0xffffffff, 0x00060005,
1289         0x3c044, 0xffffffff, 0x00090008,
1290         0x3c000, 0xffffffff, 0x96e00200,
1291         0x8708, 0xffffffff, 0x00900100,
1292         0xc424, 0xffffffff, 0x0020003f,
1293         0x38, 0xffffffff, 0x0140001c,
1294         0x3c, 0x000f0000, 0x000f0000,
1295         0x220, 0xffffffff, 0xC060000C,
1296         0x224, 0xc0000fff, 0x00000100,
1297         0x20a8, 0xffffffff, 0x00000104,
1298         0x55e4, 0xff000fff, 0x00000100,
1299         0x30cc, 0xc0000fff, 0x00000104,
1300         0xc1e4, 0x00000001, 0x00000001,
1301         0xd00c, 0xff000ff0, 0x00000100,
1302         0xd80c, 0xff000ff0, 0x00000100
1303 };
1304
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1306 {
1307         switch (rdev->family) {
1308         case CHIP_BONAIRE:
1309                 radeon_program_register_sequence(rdev,
1310                                                  bonaire_mgcg_cgcg_init,
1311                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312                 radeon_program_register_sequence(rdev,
1313                                                  bonaire_golden_registers,
1314                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315                 radeon_program_register_sequence(rdev,
1316                                                  bonaire_golden_common_registers,
1317                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318                 radeon_program_register_sequence(rdev,
1319                                                  bonaire_golden_spm_registers,
1320                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321                 break;
1322         case CHIP_KABINI:
1323                 radeon_program_register_sequence(rdev,
1324                                                  kalindi_mgcg_cgcg_init,
1325                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326                 radeon_program_register_sequence(rdev,
1327                                                  kalindi_golden_registers,
1328                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329                 radeon_program_register_sequence(rdev,
1330                                                  kalindi_golden_common_registers,
1331                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332                 radeon_program_register_sequence(rdev,
1333                                                  kalindi_golden_spm_registers,
1334                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335                 break;
1336         case CHIP_KAVERI:
1337                 radeon_program_register_sequence(rdev,
1338                                                  spectre_mgcg_cgcg_init,
1339                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340                 radeon_program_register_sequence(rdev,
1341                                                  spectre_golden_registers,
1342                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1343                 radeon_program_register_sequence(rdev,
1344                                                  spectre_golden_common_registers,
1345                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346                 radeon_program_register_sequence(rdev,
1347                                                  spectre_golden_spm_registers,
1348                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349                 break;
1350         default:
1351                 break;
1352         }
1353 }
1354
1355 /**
1356  * cik_get_xclk - get the xclk
1357  *
1358  * @rdev: radeon_device pointer
1359  *
1360  * Returns the reference clock used by the gfx engine
1361  * (CIK).
1362  */
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1364 {
1365         u32 reference_clock = rdev->clock.spll.reference_freq;
1366
1367         if (rdev->flags & RADEON_IS_IGP) {
1368                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369                         return reference_clock / 2;
1370         } else {
1371                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372                         return reference_clock / 4;
1373         }
1374         return reference_clock;
1375 }
1376
1377 /**
1378  * cik_mm_rdoorbell - read a doorbell dword
1379  *
1380  * @rdev: radeon_device pointer
1381  * @offset: byte offset into the aperture
1382  *
1383  * Returns the value in the doorbell aperture at the
1384  * requested offset (CIK).
1385  */
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1387 {
1388         if (offset < rdev->doorbell.size) {
1389                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390         } else {
1391                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392                 return 0;
1393         }
1394 }
1395
1396 /**
1397  * cik_mm_wdoorbell - write a doorbell dword
1398  *
1399  * @rdev: radeon_device pointer
1400  * @offset: byte offset into the aperture
1401  * @v: value to write
1402  *
1403  * Writes @v to the doorbell aperture at the
1404  * requested offset (CIK).
1405  */
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1407 {
1408         if (offset < rdev->doorbell.size) {
1409                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410         } else {
1411                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1412         }
1413 }
1414
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1416
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1418 {
1419         {0x00000070, 0x04400000},
1420         {0x00000071, 0x80c01803},
1421         {0x00000072, 0x00004004},
1422         {0x00000073, 0x00000100},
1423         {0x00000074, 0x00ff0000},
1424         {0x00000075, 0x34000000},
1425         {0x00000076, 0x08000014},
1426         {0x00000077, 0x00cc08ec},
1427         {0x00000078, 0x00000400},
1428         {0x00000079, 0x00000000},
1429         {0x0000007a, 0x04090000},
1430         {0x0000007c, 0x00000000},
1431         {0x0000007e, 0x4408a8e8},
1432         {0x0000007f, 0x00000304},
1433         {0x00000080, 0x00000000},
1434         {0x00000082, 0x00000001},
1435         {0x00000083, 0x00000002},
1436         {0x00000084, 0xf3e4f400},
1437         {0x00000085, 0x052024e3},
1438         {0x00000087, 0x00000000},
1439         {0x00000088, 0x01000000},
1440         {0x0000008a, 0x1c0a0000},
1441         {0x0000008b, 0xff010000},
1442         {0x0000008d, 0xffffefff},
1443         {0x0000008e, 0xfff3efff},
1444         {0x0000008f, 0xfff3efbf},
1445         {0x00000092, 0xf7ffffff},
1446         {0x00000093, 0xffffff7f},
1447         {0x00000095, 0x00101101},
1448         {0x00000096, 0x00000fff},
1449         {0x00000097, 0x00116fff},
1450         {0x00000098, 0x60010000},
1451         {0x00000099, 0x10010000},
1452         {0x0000009a, 0x00006000},
1453         {0x0000009b, 0x00001000},
1454         {0x0000009f, 0x00b48000}
1455 };
1456
1457 /**
1458  * cik_srbm_select - select specific register instances
1459  *
1460  * @rdev: radeon_device pointer
1461  * @me: selected ME (micro engine)
1462  * @pipe: pipe
1463  * @queue: queue
1464  * @vmid: VMID
1465  *
1466  * Switches the currently active registers instances.  Some
1467  * registers are instanced per VMID, others are instanced per
1468  * me/pipe/queue combination.
1469  */
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471                             u32 me, u32 pipe, u32 queue, u32 vmid)
1472 {
1473         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474                              MEID(me & 0x3) |
1475                              VMID(vmid & 0xf) |
1476                              QUEUEID(queue & 0x7));
1477         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1478 }
1479
1480 /* ucode loading */
1481 /**
1482  * ci_mc_load_microcode - load MC ucode into the hw
1483  *
1484  * @rdev: radeon_device pointer
1485  *
1486  * Load the GDDR MC ucode into the hw (CIK).
1487  * Returns 0 on success, error on failure.
1488  */
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1490 {
1491         const __be32 *fw_data;
1492         u32 running, blackout = 0;
1493         u32 *io_mc_regs;
1494         int i, ucode_size, regs_size;
1495
1496         if (!rdev->mc_fw)
1497                 return -EINVAL;
1498
1499         switch (rdev->family) {
1500         case CHIP_BONAIRE:
1501         default:
1502                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503                 ucode_size = CIK_MC_UCODE_SIZE;
1504                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505                 break;
1506         }
1507
1508         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1509
1510         if (running == 0) {
1511                 if (running) {
1512                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514                 }
1515
1516                 /* reset the engine and set to writable */
1517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1519
1520                 /* load mc io regs */
1521                 for (i = 0; i < regs_size; i++) {
1522                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1524                 }
1525                 /* load the MC ucode */
1526                 fw_data = (const __be32 *)rdev->mc_fw->data;
1527                 for (i = 0; i < ucode_size; i++)
1528                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1529
1530                 /* put the engine back into the active state */
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1534
1535                 /* wait for training to complete */
1536                 for (i = 0; i < rdev->usec_timeout; i++) {
1537                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538                                 break;
1539                         udelay(1);
1540                 }
1541                 for (i = 0; i < rdev->usec_timeout; i++) {
1542                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543                                 break;
1544                         udelay(1);
1545                 }
1546
1547                 if (running)
1548                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1549         }
1550
1551         return 0;
1552 }
1553
1554 /**
1555  * cik_init_microcode - load ucode images from disk
1556  *
1557  * @rdev: radeon_device pointer
1558  *
1559  * Use the firmware interface to load the ucode images into
1560  * the driver (not loaded into hw).
1561  * Returns 0 on success, error on failure.
1562  */
1563 static int cik_init_microcode(struct radeon_device *rdev)
1564 {
1565         const char *chip_name;
1566         size_t pfp_req_size, me_req_size, ce_req_size,
1567                 mec_req_size, rlc_req_size, mc_req_size,
1568                 sdma_req_size, smc_req_size;
1569         char fw_name[30];
1570         int err;
1571
1572         DRM_DEBUG("\n");
1573
1574         switch (rdev->family) {
1575         case CHIP_BONAIRE:
1576                 chip_name = "BONAIRE";
1577                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585                 break;
1586         case CHIP_KAVERI:
1587                 chip_name = "KAVERI";
1588                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594                 break;
1595         case CHIP_KABINI:
1596                 chip_name = "KABINI";
1597                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603                 break;
1604         default: BUG();
1605         }
1606
1607         DRM_INFO("Loading %s Microcode\n", chip_name);
1608
1609         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611         if (err)
1612                 goto out;
1613         if (rdev->pfp_fw->size != pfp_req_size) {
1614                 printk(KERN_ERR
1615                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616                        rdev->pfp_fw->size, fw_name);
1617                 err = -EINVAL;
1618                 goto out;
1619         }
1620
1621         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623         if (err)
1624                 goto out;
1625         if (rdev->me_fw->size != me_req_size) {
1626                 printk(KERN_ERR
1627                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628                        rdev->me_fw->size, fw_name);
1629                 err = -EINVAL;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->ce_fw->size != ce_req_size) {
1637                 printk(KERN_ERR
1638                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->ce_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->mec_fw->size != mec_req_size) {
1648                 printk(KERN_ERR
1649                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->mec_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->rlc_fw->size != rlc_req_size) {
1659                 printk(KERN_ERR
1660                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->rlc_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667         if (err)
1668                 goto out;
1669         if (rdev->sdma_fw->size != sdma_req_size) {
1670                 printk(KERN_ERR
1671                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672                        rdev->sdma_fw->size, fw_name);
1673                 err = -EINVAL;
1674         }
1675
1676         /* No SMC, MC ucode on APUs */
1677         if (!(rdev->flags & RADEON_IS_IGP)) {
1678                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680                 if (err)
1681                         goto out;
1682                 if (rdev->mc_fw->size != mc_req_size) {
1683                         printk(KERN_ERR
1684                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685                                rdev->mc_fw->size, fw_name);
1686                         err = -EINVAL;
1687                 }
1688
1689                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691                 if (err) {
1692                         printk(KERN_ERR
1693                                "smc: error loading firmware \"%s\"\n",
1694                                fw_name);
1695                         release_firmware(rdev->smc_fw);
1696                         rdev->smc_fw = NULL;
1697                 } else if (rdev->smc_fw->size != smc_req_size) {
1698                         printk(KERN_ERR
1699                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1700                                rdev->smc_fw->size, fw_name);
1701                         err = -EINVAL;
1702                 }
1703         }
1704
1705 out:
1706         if (err) {
1707                 if (err != -EINVAL)
1708                         printk(KERN_ERR
1709                                "cik_cp: Failed to load firmware \"%s\"\n",
1710                                fw_name);
1711                 release_firmware(rdev->pfp_fw);
1712                 rdev->pfp_fw = NULL;
1713                 release_firmware(rdev->me_fw);
1714                 rdev->me_fw = NULL;
1715                 release_firmware(rdev->ce_fw);
1716                 rdev->ce_fw = NULL;
1717                 release_firmware(rdev->rlc_fw);
1718                 rdev->rlc_fw = NULL;
1719                 release_firmware(rdev->mc_fw);
1720                 rdev->mc_fw = NULL;
1721                 release_firmware(rdev->smc_fw);
1722                 rdev->smc_fw = NULL;
1723         }
1724         return err;
1725 }
1726
1727 /*
1728  * Core functions
1729  */
1730 /**
1731  * cik_tiling_mode_table_init - init the hw tiling table
1732  *
1733  * @rdev: radeon_device pointer
1734  *
1735  * Starting with SI, the tiling setup is done globally in a
1736  * set of 32 tiling modes.  Rather than selecting each set of
1737  * parameters per surface as on older asics, we just select
1738  * which index in the tiling table we want to use, and the
1739  * surface uses those parameters (CIK).
1740  */
1741 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1742 {
1743         const u32 num_tile_mode_states = 32;
1744         const u32 num_secondary_tile_mode_states = 16;
1745         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1746         u32 num_pipe_configs;
1747         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1748                 rdev->config.cik.max_shader_engines;
1749
1750         switch (rdev->config.cik.mem_row_size_in_kb) {
1751         case 1:
1752                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1753                 break;
1754         case 2:
1755         default:
1756                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1757                 break;
1758         case 4:
1759                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1760                 break;
1761         }
1762
1763         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1764         if (num_pipe_configs > 8)
1765                 num_pipe_configs = 8; /* ??? */
1766
1767         if (num_pipe_configs == 8) {
1768                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1769                         switch (reg_offset) {
1770                         case 0:
1771                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1775                                 break;
1776                         case 1:
1777                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1781                                 break;
1782                         case 2:
1783                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1784                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1785                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1786                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1787                                 break;
1788                         case 3:
1789                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1791                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1792                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1793                                 break;
1794                         case 4:
1795                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1796                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1797                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1798                                                  TILE_SPLIT(split_equal_to_row_size));
1799                                 break;
1800                         case 5:
1801                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1802                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1803                                 break;
1804                         case 6:
1805                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1806                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1807                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1808                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1809                                 break;
1810                         case 7:
1811                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1812                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1813                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1814                                                  TILE_SPLIT(split_equal_to_row_size));
1815                                 break;
1816                         case 8:
1817                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1818                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1819                                 break;
1820                         case 9:
1821                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1822                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1823                                 break;
1824                         case 10:
1825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1826                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1827                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1828                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1829                                 break;
1830                         case 11:
1831                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1832                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1833                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1834                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1835                                 break;
1836                         case 12:
1837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1839                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1841                                 break;
1842                         case 13:
1843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1845                                 break;
1846                         case 14:
1847                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1848                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1849                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1850                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1851                                 break;
1852                         case 16:
1853                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1855                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1856                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1857                                 break;
1858                         case 17:
1859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1861                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1863                                 break;
1864                         case 27:
1865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1867                                 break;
1868                         case 28:
1869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1871                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1872                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1873                                 break;
1874                         case 29:
1875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1877                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1878                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1879                                 break;
1880                         case 30:
1881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1883                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1885                                 break;
1886                         default:
1887                                 gb_tile_moden = 0;
1888                                 break;
1889                         }
1890                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1891                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1892                 }
1893                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1894                         switch (reg_offset) {
1895                         case 0:
1896                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1898                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1899                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1900                                 break;
1901                         case 1:
1902                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1904                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1905                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1906                                 break;
1907                         case 2:
1908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1912                                 break;
1913                         case 3:
1914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1918                                 break;
1919                         case 4:
1920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1923                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1924                                 break;
1925                         case 5:
1926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1929                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1930                                 break;
1931                         case 6:
1932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1935                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1936                                 break;
1937                         case 8:
1938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1942                                 break;
1943                         case 9:
1944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1947                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1948                                 break;
1949                         case 10:
1950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1954                                 break;
1955                         case 11:
1956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1960                                 break;
1961                         case 12:
1962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1965                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1966                                 break;
1967                         case 13:
1968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1972                                 break;
1973                         case 14:
1974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1977                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1978                                 break;
1979                         default:
1980                                 gb_tile_moden = 0;
1981                                 break;
1982                         }
1983                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1984                 }
1985         } else if (num_pipe_configs == 4) {
1986                 if (num_rbs == 4) {
1987                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1988                                 switch (reg_offset) {
1989                                 case 0:
1990                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1994                                         break;
1995                                 case 1:
1996                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2000                                         break;
2001                                 case 2:
2002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2003                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2004                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2005                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2006                                         break;
2007                                 case 3:
2008                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2009                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2010                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2011                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2012                                         break;
2013                                 case 4:
2014                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2015                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2016                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2017                                                          TILE_SPLIT(split_equal_to_row_size));
2018                                         break;
2019                                 case 5:
2020                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2021                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022                                         break;
2023                                 case 6:
2024                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2025                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2027                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2028                                         break;
2029                                 case 7:
2030                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2031                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2032                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2033                                                          TILE_SPLIT(split_equal_to_row_size));
2034                                         break;
2035                                 case 8:
2036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2037                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2038                                         break;
2039                                 case 9:
2040                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2041                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2042                                         break;
2043                                 case 10:
2044                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2047                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2048                                         break;
2049                                 case 11:
2050                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2051                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2052                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2054                                         break;
2055                                 case 12:
2056                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2057                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2058                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2059                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060                                         break;
2061                                 case 13:
2062                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2063                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2064                                         break;
2065                                 case 14:
2066                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2067                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2068                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                         break;
2071                                 case 16:
2072                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2073                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2074                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2075                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076                                         break;
2077                                 case 17:
2078                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2079                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2080                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2081                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082                                         break;
2083                                 case 27:
2084                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2086                                         break;
2087                                 case 28:
2088                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2090                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2091                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                         break;
2093                                 case 29:
2094                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2096                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2097                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098                                         break;
2099                                 case 30:
2100                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2101                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2104                                         break;
2105                                 default:
2106                                         gb_tile_moden = 0;
2107                                         break;
2108                                 }
2109                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2110                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2111                         }
2112                 } else if (num_rbs < 4) {
2113                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2114                                 switch (reg_offset) {
2115                                 case 0:
2116                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2120                                         break;
2121                                 case 1:
2122                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2126                                         break;
2127                                 case 2:
2128                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2130                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2131                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2132                                         break;
2133                                 case 3:
2134                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2136                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2137                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2138                                         break;
2139                                 case 4:
2140                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2142                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143                                                          TILE_SPLIT(split_equal_to_row_size));
2144                                         break;
2145                                 case 5:
2146                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                                         break;
2149                                 case 6:
2150                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2151                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2152                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2154                                         break;
2155                                 case 7:
2156                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2157                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2158                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2159                                                          TILE_SPLIT(split_equal_to_row_size));
2160                                         break;
2161                                 case 8:
2162                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2164                                         break;
2165                                 case 9:
2166                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2168                                         break;
2169                                 case 10:
2170                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2172                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174                                         break;
2175                                 case 11:
2176                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2178                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2179                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180                                         break;
2181                                 case 12:
2182                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2183                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2185                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                                         break;
2187                                 case 13:
2188                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2189                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2190                                         break;
2191                                 case 14:
2192                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2193                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2195                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196                                         break;
2197                                 case 16:
2198                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2199                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2201                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                                         break;
2203                                 case 17:
2204                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2205                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2207                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208                                         break;
2209                                 case 27:
2210                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2211                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2212                                         break;
2213                                 case 28:
2214                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2215                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2216                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2217                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218                                         break;
2219                                 case 29:
2220                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2221                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2222                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2223                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224                                         break;
2225                                 case 30:
2226                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2227                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2229                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230                                         break;
2231                                 default:
2232                                         gb_tile_moden = 0;
2233                                         break;
2234                                 }
2235                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2236                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2237                         }
2238                 }
2239                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2240                         switch (reg_offset) {
2241                         case 0:
2242                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2246                                 break;
2247                         case 1:
2248                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2250                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2252                                 break;
2253                         case 2:
2254                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2258                                 break;
2259                         case 3:
2260                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2264                                 break;
2265                         case 4:
2266                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2270                                 break;
2271                         case 5:
2272                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2276                                 break;
2277                         case 6:
2278                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2282                                 break;
2283                         case 8:
2284                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2288                                 break;
2289                         case 9:
2290                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2294                                 break;
2295                         case 10:
2296                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2298                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2300                                 break;
2301                         case 11:
2302                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2306                                 break;
2307                         case 12:
2308                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2312                                 break;
2313                         case 13:
2314                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2317                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2318                                 break;
2319                         case 14:
2320                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2321                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2322                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2323                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2324                                 break;
2325                         default:
2326                                 gb_tile_moden = 0;
2327                                 break;
2328                         }
2329                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2330                 }
2331         } else if (num_pipe_configs == 2) {
2332                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2333                         switch (reg_offset) {
2334                         case 0:
2335                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2338                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2339                                 break;
2340                         case 1:
2341                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2344                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2345                                 break;
2346                         case 2:
2347                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2348                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2349                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2350                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2351                                 break;
2352                         case 3:
2353                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2355                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2356                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2357                                 break;
2358                         case 4:
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2362                                                  TILE_SPLIT(split_equal_to_row_size));
2363                                 break;
2364                         case 5:
2365                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2367                                 break;
2368                         case 6:
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2373                                 break;
2374                         case 7:
2375                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2376                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2377                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2378                                                  TILE_SPLIT(split_equal_to_row_size));
2379                                 break;
2380                         case 8:
2381                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2382                                 break;
2383                         case 9:
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2386                                 break;
2387                         case 10:
2388                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2391                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392                                 break;
2393                         case 11:
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2397                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398                                 break;
2399                         case 12:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2402                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2403                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                                 break;
2405                         case 13:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408                                 break;
2409                         case 14:
2410                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2413                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                                 break;
2415                         case 16:
2416                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2419                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                                 break;
2421                         case 17:
2422                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2423                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2425                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426                                 break;
2427                         case 27:
2428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2430                                 break;
2431                         case 28:
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2435                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                                 break;
2437                         case 29:
2438                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2441                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                                 break;
2443                         case 30:
2444                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2447                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                                 break;
2449                         default:
2450                                 gb_tile_moden = 0;
2451                                 break;
2452                         }
2453                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2454                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2455                 }
2456                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2457                         switch (reg_offset) {
2458                         case 0:
2459                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2460                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2463                                 break;
2464                         case 1:
2465                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2466                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2469                                 break;
2470                         case 2:
2471                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2473                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2475                                 break;
2476                         case 3:
2477                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2480                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2481                                 break;
2482                         case 4:
2483                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2487                                 break;
2488                         case 5:
2489                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2493                                 break;
2494                         case 6:
2495                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2499                                 break;
2500                         case 8:
2501                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2502                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2503                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2505                                 break;
2506                         case 9:
2507                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2508                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2511                                 break;
2512                         case 10:
2513                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2514                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2517                                 break;
2518                         case 11:
2519                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2522                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2523                                 break;
2524                         case 12:
2525                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2527                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2528                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2529                                 break;
2530                         case 13:
2531                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2535                                 break;
2536                         case 14:
2537                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2541                                 break;
2542                         default:
2543                                 gb_tile_moden = 0;
2544                                 break;
2545                         }
2546                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2547                 }
2548         } else
2549                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2550 }
2551
2552 /**
2553  * cik_select_se_sh - select which SE, SH to address
2554  *
2555  * @rdev: radeon_device pointer
2556  * @se_num: shader engine to address
2557  * @sh_num: sh block to address
2558  *
2559  * Select which SE, SH combinations to address. Certain
2560  * registers are instanced per SE or SH.  0xffffffff means
2561  * broadcast to all SEs or SHs (CIK).
2562  */
2563 static void cik_select_se_sh(struct radeon_device *rdev,
2564                              u32 se_num, u32 sh_num)
2565 {
2566         u32 data = INSTANCE_BROADCAST_WRITES;
2567
2568         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2569                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2570         else if (se_num == 0xffffffff)
2571                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2572         else if (sh_num == 0xffffffff)
2573                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2574         else
2575                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2576         WREG32(GRBM_GFX_INDEX, data);
2577 }
2578
2579 /**
2580  * cik_create_bitmask - create a bitmask
2581  *
2582  * @bit_width: length of the mask
2583  *
2584  * create a variable length bit mask (CIK).
2585  * Returns the bitmask.
2586  */
2587 static u32 cik_create_bitmask(u32 bit_width)
2588 {
2589         u32 i, mask = 0;
2590
2591         for (i = 0; i < bit_width; i++) {
2592                 mask <<= 1;
2593                 mask |= 1;
2594         }
2595         return mask;
2596 }
2597
2598 /**
2599  * cik_select_se_sh - select which SE, SH to address
2600  *
2601  * @rdev: radeon_device pointer
2602  * @max_rb_num: max RBs (render backends) for the asic
2603  * @se_num: number of SEs (shader engines) for the asic
2604  * @sh_per_se: number of SH blocks per SE for the asic
2605  *
2606  * Calculates the bitmask of disabled RBs (CIK).
2607  * Returns the disabled RB bitmask.
2608  */
2609 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2610                               u32 max_rb_num, u32 se_num,
2611                               u32 sh_per_se)
2612 {
2613         u32 data, mask;
2614
2615         data = RREG32(CC_RB_BACKEND_DISABLE);
2616         if (data & 1)
2617                 data &= BACKEND_DISABLE_MASK;
2618         else
2619                 data = 0;
2620         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2621
2622         data >>= BACKEND_DISABLE_SHIFT;
2623
2624         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2625
2626         return data & mask;
2627 }
2628
2629 /**
2630  * cik_setup_rb - setup the RBs on the asic
2631  *
2632  * @rdev: radeon_device pointer
2633  * @se_num: number of SEs (shader engines) for the asic
2634  * @sh_per_se: number of SH blocks per SE for the asic
2635  * @max_rb_num: max RBs (render backends) for the asic
2636  *
2637  * Configures per-SE/SH RB registers (CIK).
2638  */
2639 static void cik_setup_rb(struct radeon_device *rdev,
2640                          u32 se_num, u32 sh_per_se,
2641                          u32 max_rb_num)
2642 {
2643         int i, j;
2644         u32 data, mask;
2645         u32 disabled_rbs = 0;
2646         u32 enabled_rbs = 0;
2647
2648         for (i = 0; i < se_num; i++) {
2649                 for (j = 0; j < sh_per_se; j++) {
2650                         cik_select_se_sh(rdev, i, j);
2651                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2652                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2653                 }
2654         }
2655         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2656
2657         mask = 1;
2658         for (i = 0; i < max_rb_num; i++) {
2659                 if (!(disabled_rbs & mask))
2660                         enabled_rbs |= mask;
2661                 mask <<= 1;
2662         }
2663
2664         for (i = 0; i < se_num; i++) {
2665                 cik_select_se_sh(rdev, i, 0xffffffff);
2666                 data = 0;
2667                 for (j = 0; j < sh_per_se; j++) {
2668                         switch (enabled_rbs & 3) {
2669                         case 1:
2670                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2671                                 break;
2672                         case 2:
2673                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2674                                 break;
2675                         case 3:
2676                         default:
2677                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2678                                 break;
2679                         }
2680                         enabled_rbs >>= 2;
2681                 }
2682                 WREG32(PA_SC_RASTER_CONFIG, data);
2683         }
2684         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2685 }
2686
2687 /**
2688  * cik_gpu_init - setup the 3D engine
2689  *
2690  * @rdev: radeon_device pointer
2691  *
2692  * Configures the 3D engine and tiling configuration
2693  * registers so that the 3D engine is usable.
2694  */
2695 static void cik_gpu_init(struct radeon_device *rdev)
2696 {
2697         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2698         u32 mc_shared_chmap, mc_arb_ramcfg;
2699         u32 hdp_host_path_cntl;
2700         u32 tmp;
2701         int i, j;
2702
2703         switch (rdev->family) {
2704         case CHIP_BONAIRE:
2705                 rdev->config.cik.max_shader_engines = 2;
2706                 rdev->config.cik.max_tile_pipes = 4;
2707                 rdev->config.cik.max_cu_per_sh = 7;
2708                 rdev->config.cik.max_sh_per_se = 1;
2709                 rdev->config.cik.max_backends_per_se = 2;
2710                 rdev->config.cik.max_texture_channel_caches = 4;
2711                 rdev->config.cik.max_gprs = 256;
2712                 rdev->config.cik.max_gs_threads = 32;
2713                 rdev->config.cik.max_hw_contexts = 8;
2714
2715                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2716                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2717                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2718                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2719                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2720                 break;
2721         case CHIP_KAVERI:
2722                 rdev->config.cik.max_shader_engines = 1;
2723                 rdev->config.cik.max_tile_pipes = 4;
2724                 if ((rdev->pdev->device == 0x1304) ||
2725                     (rdev->pdev->device == 0x1305) ||
2726                     (rdev->pdev->device == 0x130C) ||
2727                     (rdev->pdev->device == 0x130F) ||
2728                     (rdev->pdev->device == 0x1310) ||
2729                     (rdev->pdev->device == 0x1311) ||
2730                     (rdev->pdev->device == 0x131C)) {
2731                         rdev->config.cik.max_cu_per_sh = 8;
2732                         rdev->config.cik.max_backends_per_se = 2;
2733                 } else if ((rdev->pdev->device == 0x1309) ||
2734                            (rdev->pdev->device == 0x130A) ||
2735                            (rdev->pdev->device == 0x130D) ||
2736                            (rdev->pdev->device == 0x1313) ||
2737                            (rdev->pdev->device == 0x131D)) {
2738                         rdev->config.cik.max_cu_per_sh = 6;
2739                         rdev->config.cik.max_backends_per_se = 2;
2740                 } else if ((rdev->pdev->device == 0x1306) ||
2741                            (rdev->pdev->device == 0x1307) ||
2742                            (rdev->pdev->device == 0x130B) ||
2743                            (rdev->pdev->device == 0x130E) ||
2744                            (rdev->pdev->device == 0x1315) ||
2745                            (rdev->pdev->device == 0x131B)) {
2746                         rdev->config.cik.max_cu_per_sh = 4;
2747                         rdev->config.cik.max_backends_per_se = 1;
2748                 } else {
2749                         rdev->config.cik.max_cu_per_sh = 3;
2750                         rdev->config.cik.max_backends_per_se = 1;
2751                 }
2752                 rdev->config.cik.max_sh_per_se = 1;
2753                 rdev->config.cik.max_texture_channel_caches = 4;
2754                 rdev->config.cik.max_gprs = 256;
2755                 rdev->config.cik.max_gs_threads = 16;
2756                 rdev->config.cik.max_hw_contexts = 8;
2757
2758                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2759                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2760                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2761                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2762                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2763                 break;
2764         case CHIP_KABINI:
2765         default:
2766                 rdev->config.cik.max_shader_engines = 1;
2767                 rdev->config.cik.max_tile_pipes = 2;
2768                 rdev->config.cik.max_cu_per_sh = 2;
2769                 rdev->config.cik.max_sh_per_se = 1;
2770                 rdev->config.cik.max_backends_per_se = 1;
2771                 rdev->config.cik.max_texture_channel_caches = 2;
2772                 rdev->config.cik.max_gprs = 256;
2773                 rdev->config.cik.max_gs_threads = 16;
2774                 rdev->config.cik.max_hw_contexts = 8;
2775
2776                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2777                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2778                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2779                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2780                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2781                 break;
2782         }
2783
2784         /* Initialize HDP */
2785         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2786                 WREG32((0x2c14 + j), 0x00000000);
2787                 WREG32((0x2c18 + j), 0x00000000);
2788                 WREG32((0x2c1c + j), 0x00000000);
2789                 WREG32((0x2c20 + j), 0x00000000);
2790                 WREG32((0x2c24 + j), 0x00000000);
2791         }
2792
2793         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2794
2795         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2796
2797         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2798         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2799
2800         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2801         rdev->config.cik.mem_max_burst_length_bytes = 256;
2802         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2803         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2804         if (rdev->config.cik.mem_row_size_in_kb > 4)
2805                 rdev->config.cik.mem_row_size_in_kb = 4;
2806         /* XXX use MC settings? */
2807         rdev->config.cik.shader_engine_tile_size = 32;
2808         rdev->config.cik.num_gpus = 1;
2809         rdev->config.cik.multi_gpu_tile_size = 64;
2810
2811         /* fix up row size */
2812         gb_addr_config &= ~ROW_SIZE_MASK;
2813         switch (rdev->config.cik.mem_row_size_in_kb) {
2814         case 1:
2815         default:
2816                 gb_addr_config |= ROW_SIZE(0);
2817                 break;
2818         case 2:
2819                 gb_addr_config |= ROW_SIZE(1);
2820                 break;
2821         case 4:
2822                 gb_addr_config |= ROW_SIZE(2);
2823                 break;
2824         }
2825
2826         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2827          * not have bank info, so create a custom tiling dword.
2828          * bits 3:0   num_pipes
2829          * bits 7:4   num_banks
2830          * bits 11:8  group_size
2831          * bits 15:12 row_size
2832          */
2833         rdev->config.cik.tile_config = 0;
2834         switch (rdev->config.cik.num_tile_pipes) {
2835         case 1:
2836                 rdev->config.cik.tile_config |= (0 << 0);
2837                 break;
2838         case 2:
2839                 rdev->config.cik.tile_config |= (1 << 0);
2840                 break;
2841         case 4:
2842                 rdev->config.cik.tile_config |= (2 << 0);
2843                 break;
2844         case 8:
2845         default:
2846                 /* XXX what about 12? */
2847                 rdev->config.cik.tile_config |= (3 << 0);
2848                 break;
2849         }
2850         rdev->config.cik.tile_config |=
2851                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2852         rdev->config.cik.tile_config |=
2853                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2854         rdev->config.cik.tile_config |=
2855                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2856
2857         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2858         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2859         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2860         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2861         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2862         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2863         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2864         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2865
2866         cik_tiling_mode_table_init(rdev);
2867
2868         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2869                      rdev->config.cik.max_sh_per_se,
2870                      rdev->config.cik.max_backends_per_se);
2871
2872         /* set HW defaults for 3D engine */
2873         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2874
2875         WREG32(SX_DEBUG_1, 0x20);
2876
2877         WREG32(TA_CNTL_AUX, 0x00010000);
2878
2879         tmp = RREG32(SPI_CONFIG_CNTL);
2880         tmp |= 0x03000000;
2881         WREG32(SPI_CONFIG_CNTL, tmp);
2882
2883         WREG32(SQ_CONFIG, 1);
2884
2885         WREG32(DB_DEBUG, 0);
2886
2887         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2888         tmp |= 0x00000400;
2889         WREG32(DB_DEBUG2, tmp);
2890
2891         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2892         tmp |= 0x00020200;
2893         WREG32(DB_DEBUG3, tmp);
2894
2895         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2896         tmp |= 0x00018208;
2897         WREG32(CB_HW_CONTROL, tmp);
2898
2899         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2900
2901         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2902                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2903                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2904                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2905
2906         WREG32(VGT_NUM_INSTANCES, 1);
2907
2908         WREG32(CP_PERFMON_CNTL, 0);
2909
2910         WREG32(SQ_CONFIG, 0);
2911
2912         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2913                                           FORCE_EOV_MAX_REZ_CNT(255)));
2914
2915         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2916                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2917
2918         WREG32(VGT_GS_VERTEX_REUSE, 16);
2919         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2920
2921         tmp = RREG32(HDP_MISC_CNTL);
2922         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2923         WREG32(HDP_MISC_CNTL, tmp);
2924
2925         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2926         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2927
2928         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2929         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2930
2931         udelay(50);
2932 }
2933
2934 /*
2935  * GPU scratch registers helpers function.
2936  */
2937 /**
2938  * cik_scratch_init - setup driver info for CP scratch regs
2939  *
2940  * @rdev: radeon_device pointer
2941  *
2942  * Set up the number and offset of the CP scratch registers.
2943  * NOTE: use of CP scratch registers is a legacy inferface and
2944  * is not used by default on newer asics (r6xx+).  On newer asics,
2945  * memory buffers are used for fences rather than scratch regs.
2946  */
2947 static void cik_scratch_init(struct radeon_device *rdev)
2948 {
2949         int i;
2950
2951         rdev->scratch.num_reg = 7;
2952         rdev->scratch.reg_base = SCRATCH_REG0;
2953         for (i = 0; i < rdev->scratch.num_reg; i++) {
2954                 rdev->scratch.free[i] = true;
2955                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2956         }
2957 }
2958
2959 /**
2960  * cik_ring_test - basic gfx ring test
2961  *
2962  * @rdev: radeon_device pointer
2963  * @ring: radeon_ring structure holding ring information
2964  *
2965  * Allocate a scratch register and write to it using the gfx ring (CIK).
2966  * Provides a basic gfx ring test to verify that the ring is working.
2967  * Used by cik_cp_gfx_resume();
2968  * Returns 0 on success, error on failure.
2969  */
2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2971 {
2972         uint32_t scratch;
2973         uint32_t tmp = 0;
2974         unsigned i;
2975         int r;
2976
2977         r = radeon_scratch_get(rdev, &scratch);
2978         if (r) {
2979                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2980                 return r;
2981         }
2982         WREG32(scratch, 0xCAFEDEAD);
2983         r = radeon_ring_lock(rdev, ring, 3);
2984         if (r) {
2985                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2986                 radeon_scratch_free(rdev, scratch);
2987                 return r;
2988         }
2989         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2990         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2991         radeon_ring_write(ring, 0xDEADBEEF);
2992         radeon_ring_unlock_commit(rdev, ring);
2993
2994         for (i = 0; i < rdev->usec_timeout; i++) {
2995                 tmp = RREG32(scratch);
2996                 if (tmp == 0xDEADBEEF)
2997                         break;
2998                 DRM_UDELAY(1);
2999         }
3000         if (i < rdev->usec_timeout) {
3001                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3002         } else {
3003                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3004                           ring->idx, scratch, tmp);
3005                 r = -EINVAL;
3006         }
3007         radeon_scratch_free(rdev, scratch);
3008         return r;
3009 }
3010
3011 /**
3012  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3013  *
3014  * @rdev: radeon_device pointer
3015  * @fence: radeon fence object
3016  *
3017  * Emits a fence sequnce number on the gfx ring and flushes
3018  * GPU caches.
3019  */
3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3021                              struct radeon_fence *fence)
3022 {
3023         struct radeon_ring *ring = &rdev->ring[fence->ring];
3024         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3025
3026         /* EVENT_WRITE_EOP - flush caches, send int */
3027         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3028         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3029                                  EOP_TC_ACTION_EN |
3030                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3031                                  EVENT_INDEX(5)));
3032         radeon_ring_write(ring, addr & 0xfffffffc);
3033         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3034         radeon_ring_write(ring, fence->seq);
3035         radeon_ring_write(ring, 0);
3036         /* HDP flush */
3037         /* We should be using the new WAIT_REG_MEM special op packet here
3038          * but it causes the CP to hang
3039          */
3040         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3041         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3042                                  WRITE_DATA_DST_SEL(0)));
3043         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3044         radeon_ring_write(ring, 0);
3045         radeon_ring_write(ring, 0);
3046 }
3047
3048 /**
3049  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3050  *
3051  * @rdev: radeon_device pointer
3052  * @fence: radeon fence object
3053  *
3054  * Emits a fence sequnce number on the compute ring and flushes
3055  * GPU caches.
3056  */
3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3058                                  struct radeon_fence *fence)
3059 {
3060         struct radeon_ring *ring = &rdev->ring[fence->ring];
3061         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3062
3063         /* RELEASE_MEM - flush caches, send int */
3064         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3065         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3066                                  EOP_TC_ACTION_EN |
3067                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3068                                  EVENT_INDEX(5)));
3069         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3070         radeon_ring_write(ring, addr & 0xfffffffc);
3071         radeon_ring_write(ring, upper_32_bits(addr));
3072         radeon_ring_write(ring, fence->seq);
3073         radeon_ring_write(ring, 0);
3074         /* HDP flush */
3075         /* We should be using the new WAIT_REG_MEM special op packet here
3076          * but it causes the CP to hang
3077          */
3078         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3080                                  WRITE_DATA_DST_SEL(0)));
3081         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3082         radeon_ring_write(ring, 0);
3083         radeon_ring_write(ring, 0);
3084 }
3085
3086 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3087                              struct radeon_ring *ring,
3088                              struct radeon_semaphore *semaphore,
3089                              bool emit_wait)
3090 {
3091         uint64_t addr = semaphore->gpu_addr;
3092         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3093
3094         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3095         radeon_ring_write(ring, addr & 0xffffffff);
3096         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3097 }
3098
3099 /*
3100  * IB stuff
3101  */
3102 /**
3103  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3104  *
3105  * @rdev: radeon_device pointer
3106  * @ib: radeon indirect buffer object
3107  *
3108  * Emits an DE (drawing engine) or CE (constant engine) IB
3109  * on the gfx ring.  IBs are usually generated by userspace
3110  * acceleration drivers and submitted to the kernel for
3111  * sheduling on the ring.  This function schedules the IB
3112  * on the gfx ring for execution by the GPU.
3113  */
3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3115 {
3116         struct radeon_ring *ring = &rdev->ring[ib->ring];
3117         u32 header, control = INDIRECT_BUFFER_VALID;
3118
3119         if (ib->is_const_ib) {
3120                 /* set switch buffer packet before const IB */
3121                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3122                 radeon_ring_write(ring, 0);
3123
3124                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3125         } else {
3126                 u32 next_rptr;
3127                 if (ring->rptr_save_reg) {
3128                         next_rptr = ring->wptr + 3 + 4;
3129                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3130                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3131                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3132                         radeon_ring_write(ring, next_rptr);
3133                 } else if (rdev->wb.enabled) {
3134                         next_rptr = ring->wptr + 5 + 4;
3135                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3136                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3137                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3138                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3139                         radeon_ring_write(ring, next_rptr);
3140                 }
3141
3142                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3143         }
3144
3145         control |= ib->length_dw |
3146                 (ib->vm ? (ib->vm->id << 24) : 0);
3147
3148         radeon_ring_write(ring, header);
3149         radeon_ring_write(ring,
3150 #ifdef __BIG_ENDIAN
3151                           (2 << 0) |
3152 #endif
3153                           (ib->gpu_addr & 0xFFFFFFFC));
3154         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3155         radeon_ring_write(ring, control);
3156 }
3157
3158 /**
3159  * cik_ib_test - basic gfx ring IB test
3160  *
3161  * @rdev: radeon_device pointer
3162  * @ring: radeon_ring structure holding ring information
3163  *
3164  * Allocate an IB and execute it on the gfx ring (CIK).
3165  * Provides a basic gfx ring test to verify that IBs are working.
3166  * Returns 0 on success, error on failure.
3167  */
3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3169 {
3170         struct radeon_ib ib;
3171         uint32_t scratch;
3172         uint32_t tmp = 0;
3173         unsigned i;
3174         int r;
3175
3176         r = radeon_scratch_get(rdev, &scratch);
3177         if (r) {
3178                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3179                 return r;
3180         }
3181         WREG32(scratch, 0xCAFEDEAD);
3182         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3183         if (r) {
3184                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3185                 return r;
3186         }
3187         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3188         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3189         ib.ptr[2] = 0xDEADBEEF;
3190         ib.length_dw = 3;
3191         r = radeon_ib_schedule(rdev, &ib, NULL);
3192         if (r) {
3193                 radeon_scratch_free(rdev, scratch);
3194                 radeon_ib_free(rdev, &ib);
3195                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3196                 return r;
3197         }
3198         r = radeon_fence_wait(ib.fence, false);
3199         if (r) {
3200                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3201                 return r;
3202         }
3203         for (i = 0; i < rdev->usec_timeout; i++) {
3204                 tmp = RREG32(scratch);
3205                 if (tmp == 0xDEADBEEF)
3206                         break;
3207                 DRM_UDELAY(1);
3208         }
3209         if (i < rdev->usec_timeout) {
3210                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3211         } else {
3212                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3213                           scratch, tmp);
3214                 r = -EINVAL;
3215         }
3216         radeon_scratch_free(rdev, scratch);
3217         radeon_ib_free(rdev, &ib);
3218         return r;
3219 }
3220
3221 /*
3222  * CP.
3223  * On CIK, gfx and compute now have independant command processors.
3224  *
3225  * GFX
3226  * Gfx consists of a single ring and can process both gfx jobs and
3227  * compute jobs.  The gfx CP consists of three microengines (ME):
3228  * PFP - Pre-Fetch Parser
3229  * ME - Micro Engine
3230  * CE - Constant Engine
3231  * The PFP and ME make up what is considered the Drawing Engine (DE).
3232  * The CE is an asynchronous engine used for updating buffer desciptors
3233  * used by the DE so that they can be loaded into cache in parallel
3234  * while the DE is processing state update packets.
3235  *
3236  * Compute
3237  * The compute CP consists of two microengines (ME):
3238  * MEC1 - Compute MicroEngine 1
3239  * MEC2 - Compute MicroEngine 2
3240  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3241  * The queues are exposed to userspace and are programmed directly
3242  * by the compute runtime.
3243  */
3244 /**
3245  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3246  *
3247  * @rdev: radeon_device pointer
3248  * @enable: enable or disable the MEs
3249  *
3250  * Halts or unhalts the gfx MEs.
3251  */
3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3253 {
3254         if (enable)
3255                 WREG32(CP_ME_CNTL, 0);
3256         else {
3257                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3258                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3259         }
3260         udelay(50);
3261 }
3262
3263 /**
3264  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3265  *
3266  * @rdev: radeon_device pointer
3267  *
3268  * Loads the gfx PFP, ME, and CE ucode.
3269  * Returns 0 for success, -EINVAL if the ucode is not available.
3270  */
3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3272 {
3273         const __be32 *fw_data;
3274         int i;
3275
3276         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3277                 return -EINVAL;
3278
3279         cik_cp_gfx_enable(rdev, false);
3280
3281         /* PFP */
3282         fw_data = (const __be32 *)rdev->pfp_fw->data;
3283         WREG32(CP_PFP_UCODE_ADDR, 0);
3284         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3285                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3286         WREG32(CP_PFP_UCODE_ADDR, 0);
3287
3288         /* CE */
3289         fw_data = (const __be32 *)rdev->ce_fw->data;
3290         WREG32(CP_CE_UCODE_ADDR, 0);
3291         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3292                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3293         WREG32(CP_CE_UCODE_ADDR, 0);
3294
3295         /* ME */
3296         fw_data = (const __be32 *)rdev->me_fw->data;
3297         WREG32(CP_ME_RAM_WADDR, 0);
3298         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3299                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3300         WREG32(CP_ME_RAM_WADDR, 0);
3301
3302         WREG32(CP_PFP_UCODE_ADDR, 0);
3303         WREG32(CP_CE_UCODE_ADDR, 0);
3304         WREG32(CP_ME_RAM_WADDR, 0);
3305         WREG32(CP_ME_RAM_RADDR, 0);
3306         return 0;
3307 }
3308
3309 /**
3310  * cik_cp_gfx_start - start the gfx ring
3311  *
3312  * @rdev: radeon_device pointer
3313  *
3314  * Enables the ring and loads the clear state context and other
3315  * packets required to init the ring.
3316  * Returns 0 for success, error for failure.
3317  */
3318 static int cik_cp_gfx_start(struct radeon_device *rdev)
3319 {
3320         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3321         int r, i;
3322
3323         /* init the CP */
3324         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3325         WREG32(CP_ENDIAN_SWAP, 0);
3326         WREG32(CP_DEVICE_ID, 1);
3327
3328         cik_cp_gfx_enable(rdev, true);
3329
3330         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3331         if (r) {
3332                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3333                 return r;
3334         }
3335
3336         /* init the CE partitions.  CE only used for gfx on CIK */
3337         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3338         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3339         radeon_ring_write(ring, 0xc000);
3340         radeon_ring_write(ring, 0xc000);
3341
3342         /* setup clear context state */
3343         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3345
3346         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3347         radeon_ring_write(ring, 0x80000000);
3348         radeon_ring_write(ring, 0x80000000);
3349
3350         for (i = 0; i < cik_default_size; i++)
3351                 radeon_ring_write(ring, cik_default_state[i]);
3352
3353         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355
3356         /* set clear context state */
3357         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358         radeon_ring_write(ring, 0);
3359
3360         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361         radeon_ring_write(ring, 0x00000316);
3362         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364
3365         radeon_ring_unlock_commit(rdev, ring);
3366
3367         return 0;
3368 }
3369
3370 /**
3371  * cik_cp_gfx_fini - stop the gfx ring
3372  *
3373  * @rdev: radeon_device pointer
3374  *
3375  * Stop the gfx ring and tear down the driver ring
3376  * info.
3377  */
3378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3379 {
3380         cik_cp_gfx_enable(rdev, false);
3381         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3382 }
3383
3384 /**
3385  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3386  *
3387  * @rdev: radeon_device pointer
3388  *
3389  * Program the location and size of the gfx ring buffer
3390  * and test it to make sure it's working.
3391  * Returns 0 for success, error for failure.
3392  */
3393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3394 {
3395         struct radeon_ring *ring;
3396         u32 tmp;
3397         u32 rb_bufsz;
3398         u64 rb_addr;
3399         int r;
3400
3401         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3402         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3403
3404         /* Set the write pointer delay */
3405         WREG32(CP_RB_WPTR_DELAY, 0);
3406
3407         /* set the RB to use vmid 0 */
3408         WREG32(CP_RB_VMID, 0);
3409
3410         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3411
3412         /* ring 0 - compute and gfx */
3413         /* Set ring buffer size */
3414         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3415         rb_bufsz = order_base_2(ring->ring_size / 8);
3416         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3417 #ifdef __BIG_ENDIAN
3418         tmp |= BUF_SWAP_32BIT;
3419 #endif
3420         WREG32(CP_RB0_CNTL, tmp);
3421
3422         /* Initialize the ring buffer's read and write pointers */
3423         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3424         ring->wptr = 0;
3425         WREG32(CP_RB0_WPTR, ring->wptr);
3426
3427         /* set the wb address wether it's enabled or not */
3428         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3429         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3430
3431         /* scratch register shadowing is no longer supported */
3432         WREG32(SCRATCH_UMSK, 0);
3433
3434         if (!rdev->wb.enabled)
3435                 tmp |= RB_NO_UPDATE;
3436
3437         mdelay(1);
3438         WREG32(CP_RB0_CNTL, tmp);
3439
3440         rb_addr = ring->gpu_addr >> 8;
3441         WREG32(CP_RB0_BASE, rb_addr);
3442         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3443
3444         ring->rptr = RREG32(CP_RB0_RPTR);
3445
3446         /* start the ring */
3447         cik_cp_gfx_start(rdev);
3448         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3449         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3450         if (r) {
3451                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3452                 return r;
3453         }
3454         return 0;
3455 }
3456
3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3458                               struct radeon_ring *ring)
3459 {
3460         u32 rptr;
3461
3462
3463
3464         if (rdev->wb.enabled) {
3465                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3466         } else {
3467                 mutex_lock(&rdev->srbm_mutex);
3468                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3469                 rptr = RREG32(CP_HQD_PQ_RPTR);
3470                 cik_srbm_select(rdev, 0, 0, 0, 0);
3471                 mutex_unlock(&rdev->srbm_mutex);
3472         }
3473
3474         return rptr;
3475 }
3476
3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3478                               struct radeon_ring *ring)
3479 {
3480         u32 wptr;
3481
3482         if (rdev->wb.enabled) {
3483                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3484         } else {
3485                 mutex_lock(&rdev->srbm_mutex);
3486                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3487                 wptr = RREG32(CP_HQD_PQ_WPTR);
3488                 cik_srbm_select(rdev, 0, 0, 0, 0);
3489                 mutex_unlock(&rdev->srbm_mutex);
3490         }
3491
3492         return wptr;
3493 }
3494
3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3496                                struct radeon_ring *ring)
3497 {
3498         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3499         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3500 }
3501
3502 /**
3503  * cik_cp_compute_enable - enable/disable the compute CP MEs
3504  *
3505  * @rdev: radeon_device pointer
3506  * @enable: enable or disable the MEs
3507  *
3508  * Halts or unhalts the compute MEs.
3509  */
3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3511 {
3512         if (enable)
3513                 WREG32(CP_MEC_CNTL, 0);
3514         else
3515                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3516         udelay(50);
3517 }
3518
3519 /**
3520  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3521  *
3522  * @rdev: radeon_device pointer
3523  *
3524  * Loads the compute MEC1&2 ucode.
3525  * Returns 0 for success, -EINVAL if the ucode is not available.
3526  */
3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3528 {
3529         const __be32 *fw_data;
3530         int i;
3531
3532         if (!rdev->mec_fw)
3533                 return -EINVAL;
3534
3535         cik_cp_compute_enable(rdev, false);
3536
3537         /* MEC1 */
3538         fw_data = (const __be32 *)rdev->mec_fw->data;
3539         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3540         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3541                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3542         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3543
3544         if (rdev->family == CHIP_KAVERI) {
3545                 /* MEC2 */
3546                 fw_data = (const __be32 *)rdev->mec_fw->data;
3547                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3548                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3549                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3550                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3551         }
3552
3553         return 0;
3554 }
3555
3556 /**
3557  * cik_cp_compute_start - start the compute queues
3558  *
3559  * @rdev: radeon_device pointer
3560  *
3561  * Enable the compute queues.
3562  * Returns 0 for success, error for failure.
3563  */
3564 static int cik_cp_compute_start(struct radeon_device *rdev)
3565 {
3566         cik_cp_compute_enable(rdev, true);
3567
3568         return 0;
3569 }
3570
3571 /**
3572  * cik_cp_compute_fini - stop the compute queues
3573  *
3574  * @rdev: radeon_device pointer
3575  *
3576  * Stop the compute queues and tear down the driver queue
3577  * info.
3578  */
3579 static void cik_cp_compute_fini(struct radeon_device *rdev)
3580 {
3581         int i, idx, r;
3582
3583         cik_cp_compute_enable(rdev, false);
3584
3585         for (i = 0; i < 2; i++) {
3586                 if (i == 0)
3587                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3588                 else
3589                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3590
3591                 if (rdev->ring[idx].mqd_obj) {
3592                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3593                         if (unlikely(r != 0))
3594                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3595
3596                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3597                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3598
3599                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3600                         rdev->ring[idx].mqd_obj = NULL;
3601                 }
3602         }
3603 }
3604
3605 static void cik_mec_fini(struct radeon_device *rdev)
3606 {
3607         int r;
3608
3609         if (rdev->mec.hpd_eop_obj) {
3610                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3611                 if (unlikely(r != 0))
3612                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3613                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3614                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3615
3616                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3617                 rdev->mec.hpd_eop_obj = NULL;
3618         }
3619 }
3620
3621 #define MEC_HPD_SIZE 2048
3622
3623 static int cik_mec_init(struct radeon_device *rdev)
3624 {
3625         int r;
3626         u32 *hpd;
3627
3628         /*
3629          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3630          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3631          */
3632         if (rdev->family == CHIP_KAVERI)
3633                 rdev->mec.num_mec = 2;
3634         else
3635                 rdev->mec.num_mec = 1;
3636         rdev->mec.num_pipe = 4;
3637         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3638
3639         if (rdev->mec.hpd_eop_obj == NULL) {
3640                 r = radeon_bo_create(rdev,
3641                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3642                                      PAGE_SIZE, true,
3643                                      RADEON_GEM_DOMAIN_GTT, NULL,
3644                                      &rdev->mec.hpd_eop_obj);
3645                 if (r) {
3646                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3647                         return r;
3648                 }
3649         }
3650
3651         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3652         if (unlikely(r != 0)) {
3653                 cik_mec_fini(rdev);
3654                 return r;
3655         }
3656         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3657                           &rdev->mec.hpd_eop_gpu_addr);
3658         if (r) {
3659                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3660                 cik_mec_fini(rdev);
3661                 return r;
3662         }
3663         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3664         if (r) {
3665                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3666                 cik_mec_fini(rdev);
3667                 return r;
3668         }
3669
3670         /* clear memory.  Not sure if this is required or not */
3671         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3672
3673         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3674         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3675
3676         return 0;
3677 }
3678
3679 struct hqd_registers
3680 {
3681         u32 cp_mqd_base_addr;
3682         u32 cp_mqd_base_addr_hi;
3683         u32 cp_hqd_active;
3684         u32 cp_hqd_vmid;
3685         u32 cp_hqd_persistent_state;
3686         u32 cp_hqd_pipe_priority;
3687         u32 cp_hqd_queue_priority;
3688         u32 cp_hqd_quantum;
3689         u32 cp_hqd_pq_base;
3690         u32 cp_hqd_pq_base_hi;
3691         u32 cp_hqd_pq_rptr;
3692         u32 cp_hqd_pq_rptr_report_addr;
3693         u32 cp_hqd_pq_rptr_report_addr_hi;
3694         u32 cp_hqd_pq_wptr_poll_addr;
3695         u32 cp_hqd_pq_wptr_poll_addr_hi;
3696         u32 cp_hqd_pq_doorbell_control;
3697         u32 cp_hqd_pq_wptr;
3698         u32 cp_hqd_pq_control;
3699         u32 cp_hqd_ib_base_addr;
3700         u32 cp_hqd_ib_base_addr_hi;
3701         u32 cp_hqd_ib_rptr;
3702         u32 cp_hqd_ib_control;
3703         u32 cp_hqd_iq_timer;
3704         u32 cp_hqd_iq_rptr;
3705         u32 cp_hqd_dequeue_request;
3706         u32 cp_hqd_dma_offload;
3707         u32 cp_hqd_sema_cmd;
3708         u32 cp_hqd_msg_type;
3709         u32 cp_hqd_atomic0_preop_lo;
3710         u32 cp_hqd_atomic0_preop_hi;
3711         u32 cp_hqd_atomic1_preop_lo;
3712         u32 cp_hqd_atomic1_preop_hi;
3713         u32 cp_hqd_hq_scheduler0;
3714         u32 cp_hqd_hq_scheduler1;
3715         u32 cp_mqd_control;
3716 };
3717
3718 struct bonaire_mqd
3719 {
3720         u32 header;
3721         u32 dispatch_initiator;
3722         u32 dimensions[3];
3723         u32 start_idx[3];
3724         u32 num_threads[3];
3725         u32 pipeline_stat_enable;
3726         u32 perf_counter_enable;
3727         u32 pgm[2];
3728         u32 tba[2];
3729         u32 tma[2];
3730         u32 pgm_rsrc[2];
3731         u32 vmid;
3732         u32 resource_limits;
3733         u32 static_thread_mgmt01[2];
3734         u32 tmp_ring_size;
3735         u32 static_thread_mgmt23[2];
3736         u32 restart[3];
3737         u32 thread_trace_enable;
3738         u32 reserved1;
3739         u32 user_data[16];
3740         u32 vgtcs_invoke_count[2];
3741         struct hqd_registers queue_state;
3742         u32 dequeue_cntr;
3743         u32 interrupt_queue[64];
3744 };
3745
3746 /**
3747  * cik_cp_compute_resume - setup the compute queue registers
3748  *
3749  * @rdev: radeon_device pointer
3750  *
3751  * Program the compute queues and test them to make sure they
3752  * are working.
3753  * Returns 0 for success, error for failure.
3754  */
3755 static int cik_cp_compute_resume(struct radeon_device *rdev)
3756 {
3757         int r, i, idx;
3758         u32 tmp;
3759         bool use_doorbell = true;
3760         u64 hqd_gpu_addr;
3761         u64 mqd_gpu_addr;
3762         u64 eop_gpu_addr;
3763         u64 wb_gpu_addr;
3764         u32 *buf;
3765         struct bonaire_mqd *mqd;
3766
3767         r = cik_cp_compute_start(rdev);
3768         if (r)
3769                 return r;
3770
3771         /* fix up chicken bits */
3772         tmp = RREG32(CP_CPF_DEBUG);
3773         tmp |= (1 << 23);
3774         WREG32(CP_CPF_DEBUG, tmp);
3775
3776         /* init the pipes */
3777         mutex_lock(&rdev->srbm_mutex);
3778         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3779                 int me = (i < 4) ? 1 : 2;
3780                 int pipe = (i < 4) ? i : (i - 4);
3781
3782                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3783
3784                 cik_srbm_select(rdev, me, pipe, 0, 0);
3785
3786                 /* write the EOP addr */
3787                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3788                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3789
3790                 /* set the VMID assigned */
3791                 WREG32(CP_HPD_EOP_VMID, 0);
3792
3793                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3794                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3795                 tmp &= ~EOP_SIZE_MASK;
3796                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3797                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3798         }
3799         cik_srbm_select(rdev, 0, 0, 0, 0);
3800         mutex_unlock(&rdev->srbm_mutex);
3801
3802         /* init the queues.  Just two for now. */
3803         for (i = 0; i < 2; i++) {
3804                 if (i == 0)
3805                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3806                 else
3807                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3808
3809                 if (rdev->ring[idx].mqd_obj == NULL) {
3810                         r = radeon_bo_create(rdev,
3811                                              sizeof(struct bonaire_mqd),
3812                                              PAGE_SIZE, true,
3813                                              RADEON_GEM_DOMAIN_GTT, NULL,
3814                                              &rdev->ring[idx].mqd_obj);
3815                         if (r) {
3816                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3817                                 return r;
3818                         }
3819                 }
3820
3821                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3822                 if (unlikely(r != 0)) {
3823                         cik_cp_compute_fini(rdev);
3824                         return r;
3825                 }
3826                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3827                                   &mqd_gpu_addr);
3828                 if (r) {
3829                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3830                         cik_cp_compute_fini(rdev);
3831                         return r;
3832                 }
3833                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3834                 if (r) {
3835                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3836                         cik_cp_compute_fini(rdev);
3837                         return r;
3838                 }
3839
3840                 /* doorbell offset */
3841                 rdev->ring[idx].doorbell_offset =
3842                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3843
3844                 /* init the mqd struct */
3845                 memset(buf, 0, sizeof(struct bonaire_mqd));
3846
3847                 mqd = (struct bonaire_mqd *)buf;
3848                 mqd->header = 0xC0310800;
3849                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3850                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3851                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3852                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3853
3854                 mutex_lock(&rdev->srbm_mutex);
3855                 cik_srbm_select(rdev, rdev->ring[idx].me,
3856                                 rdev->ring[idx].pipe,
3857                                 rdev->ring[idx].queue, 0);
3858
3859                 /* disable wptr polling */
3860                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3861                 tmp &= ~WPTR_POLL_EN;
3862                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3863
3864                 /* enable doorbell? */
3865                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3866                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3867                 if (use_doorbell)
3868                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3869                 else
3870                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3871                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3872                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3873
3874                 /* disable the queue if it's active */
3875                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3876                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3877                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3878                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3879                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3880                         for (i = 0; i < rdev->usec_timeout; i++) {
3881                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3882                                         break;
3883                                 udelay(1);
3884                         }
3885                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3886                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3887                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3888                 }
3889
3890                 /* set the pointer to the MQD */
3891                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3892                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3893                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3894                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3895                 /* set MQD vmid to 0 */
3896                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3897                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3898                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3899
3900                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3901                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3902                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3903                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3904                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3905                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3906
3907                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3908                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3909                 mqd->queue_state.cp_hqd_pq_control &=
3910                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3911
3912                 mqd->queue_state.cp_hqd_pq_control |=
3913                         order_base_2(rdev->ring[idx].ring_size / 8);
3914                 mqd->queue_state.cp_hqd_pq_control |=
3915                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3916 #ifdef __BIG_ENDIAN
3917                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3918 #endif
3919                 mqd->queue_state.cp_hqd_pq_control &=
3920                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3921                 mqd->queue_state.cp_hqd_pq_control |=
3922                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3923                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3924
3925                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3926                 if (i == 0)
3927                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3928                 else
3929                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3930                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3931                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3932                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3933                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3934                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3935
3936                 /* set the wb address wether it's enabled or not */
3937                 if (i == 0)
3938                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3939                 else
3940                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3941                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3942                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3943                         upper_32_bits(wb_gpu_addr) & 0xffff;
3944                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3945                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3946                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3947                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3948
3949                 /* enable the doorbell if requested */
3950                 if (use_doorbell) {
3951                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3952                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3953                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3954                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3955                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3956                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3957                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3958                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3959
3960                 } else {
3961                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3962                 }
3963                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3964                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3965
3966                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3967                 rdev->ring[idx].wptr = 0;
3968                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3969                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3970                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3971                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3972
3973                 /* set the vmid for the queue */
3974                 mqd->queue_state.cp_hqd_vmid = 0;
3975                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3976
3977                 /* activate the queue */
3978                 mqd->queue_state.cp_hqd_active = 1;
3979                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3980
3981                 cik_srbm_select(rdev, 0, 0, 0, 0);
3982                 mutex_unlock(&rdev->srbm_mutex);
3983
3984                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3985                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3986
3987                 rdev->ring[idx].ready = true;
3988                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3989                 if (r)
3990                         rdev->ring[idx].ready = false;
3991         }
3992
3993         return 0;
3994 }
3995
3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3997 {
3998         cik_cp_gfx_enable(rdev, enable);
3999         cik_cp_compute_enable(rdev, enable);
4000 }
4001
4002 static int cik_cp_load_microcode(struct radeon_device *rdev)
4003 {
4004         int r;
4005
4006         r = cik_cp_gfx_load_microcode(rdev);
4007         if (r)
4008                 return r;
4009         r = cik_cp_compute_load_microcode(rdev);
4010         if (r)
4011                 return r;
4012
4013         return 0;
4014 }
4015
4016 static void cik_cp_fini(struct radeon_device *rdev)
4017 {
4018         cik_cp_gfx_fini(rdev);
4019         cik_cp_compute_fini(rdev);
4020 }
4021
4022 static int cik_cp_resume(struct radeon_device *rdev)
4023 {
4024         int r;
4025
4026         cik_enable_gui_idle_interrupt(rdev, false);
4027
4028         r = cik_cp_load_microcode(rdev);
4029         if (r)
4030                 return r;
4031
4032         r = cik_cp_gfx_resume(rdev);
4033         if (r)
4034                 return r;
4035         r = cik_cp_compute_resume(rdev);
4036         if (r)
4037                 return r;
4038
4039         cik_enable_gui_idle_interrupt(rdev, true);
4040
4041         return 0;
4042 }
4043
4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4045 {
4046         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4047                 RREG32(GRBM_STATUS));
4048         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4049                 RREG32(GRBM_STATUS2));
4050         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4051                 RREG32(GRBM_STATUS_SE0));
4052         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4053                 RREG32(GRBM_STATUS_SE1));
4054         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4055                 RREG32(GRBM_STATUS_SE2));
4056         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4057                 RREG32(GRBM_STATUS_SE3));
4058         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4059                 RREG32(SRBM_STATUS));
4060         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4061                 RREG32(SRBM_STATUS2));
4062         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4063                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4064         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4065                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4066         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4067         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4068                  RREG32(CP_STALLED_STAT1));
4069         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4070                  RREG32(CP_STALLED_STAT2));
4071         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4072                  RREG32(CP_STALLED_STAT3));
4073         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4074                  RREG32(CP_CPF_BUSY_STAT));
4075         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4076                  RREG32(CP_CPF_STALLED_STAT1));
4077         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4078         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4079         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4080                  RREG32(CP_CPC_STALLED_STAT1));
4081         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4082 }
4083
4084 /**
4085  * cik_gpu_check_soft_reset - check which blocks are busy
4086  *
4087  * @rdev: radeon_device pointer
4088  *
4089  * Check which blocks are busy and return the relevant reset
4090  * mask to be used by cik_gpu_soft_reset().
4091  * Returns a mask of the blocks to be reset.
4092  */
4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4094 {
4095         u32 reset_mask = 0;
4096         u32 tmp;
4097
4098         /* GRBM_STATUS */
4099         tmp = RREG32(GRBM_STATUS);
4100         if (tmp & (PA_BUSY | SC_BUSY |
4101                    BCI_BUSY | SX_BUSY |
4102                    TA_BUSY | VGT_BUSY |
4103                    DB_BUSY | CB_BUSY |
4104                    GDS_BUSY | SPI_BUSY |
4105                    IA_BUSY | IA_BUSY_NO_DMA))
4106                 reset_mask |= RADEON_RESET_GFX;
4107
4108         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4109                 reset_mask |= RADEON_RESET_CP;
4110
4111         /* GRBM_STATUS2 */
4112         tmp = RREG32(GRBM_STATUS2);
4113         if (tmp & RLC_BUSY)
4114                 reset_mask |= RADEON_RESET_RLC;
4115
4116         /* SDMA0_STATUS_REG */
4117         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4118         if (!(tmp & SDMA_IDLE))
4119                 reset_mask |= RADEON_RESET_DMA;
4120
4121         /* SDMA1_STATUS_REG */
4122         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4123         if (!(tmp & SDMA_IDLE))
4124                 reset_mask |= RADEON_RESET_DMA1;
4125
4126         /* SRBM_STATUS2 */
4127         tmp = RREG32(SRBM_STATUS2);
4128         if (tmp & SDMA_BUSY)
4129                 reset_mask |= RADEON_RESET_DMA;
4130
4131         if (tmp & SDMA1_BUSY)
4132                 reset_mask |= RADEON_RESET_DMA1;
4133
4134         /* SRBM_STATUS */
4135         tmp = RREG32(SRBM_STATUS);
4136
4137         if (tmp & IH_BUSY)
4138                 reset_mask |= RADEON_RESET_IH;
4139
4140         if (tmp & SEM_BUSY)
4141                 reset_mask |= RADEON_RESET_SEM;
4142
4143         if (tmp & GRBM_RQ_PENDING)
4144                 reset_mask |= RADEON_RESET_GRBM;
4145
4146         if (tmp & VMC_BUSY)
4147                 reset_mask |= RADEON_RESET_VMC;
4148
4149         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4150                    MCC_BUSY | MCD_BUSY))
4151                 reset_mask |= RADEON_RESET_MC;
4152
4153         if (evergreen_is_display_hung(rdev))
4154                 reset_mask |= RADEON_RESET_DISPLAY;
4155
4156         /* Skip MC reset as it's mostly likely not hung, just busy */
4157         if (reset_mask & RADEON_RESET_MC) {
4158                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4159                 reset_mask &= ~RADEON_RESET_MC;
4160         }
4161
4162         return reset_mask;
4163 }
4164
4165 /**
4166  * cik_gpu_soft_reset - soft reset GPU
4167  *
4168  * @rdev: radeon_device pointer
4169  * @reset_mask: mask of which blocks to reset
4170  *
4171  * Soft reset the blocks specified in @reset_mask.
4172  */
4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4174 {
4175         struct evergreen_mc_save save;
4176         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4177         u32 tmp;
4178
4179         if (reset_mask == 0)
4180                 return;
4181
4182         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4183
4184         cik_print_gpu_status_regs(rdev);
4185         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4186                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4187         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4188                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4189
4190         /* disable CG/PG */
4191         cik_fini_pg(rdev);
4192         cik_fini_cg(rdev);
4193
4194         /* stop the rlc */
4195         cik_rlc_stop(rdev);
4196
4197         /* Disable GFX parsing/prefetching */
4198         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4199
4200         /* Disable MEC parsing/prefetching */
4201         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4202
4203         if (reset_mask & RADEON_RESET_DMA) {
4204                 /* sdma0 */
4205                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4206                 tmp |= SDMA_HALT;
4207                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4208         }
4209         if (reset_mask & RADEON_RESET_DMA1) {
4210                 /* sdma1 */
4211                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4212                 tmp |= SDMA_HALT;
4213                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4214         }
4215
4216         evergreen_mc_stop(rdev, &save);
4217         if (evergreen_mc_wait_for_idle(rdev)) {
4218                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4219         }
4220
4221         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4222                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4223
4224         if (reset_mask & RADEON_RESET_CP) {
4225                 grbm_soft_reset |= SOFT_RESET_CP;
4226
4227                 srbm_soft_reset |= SOFT_RESET_GRBM;
4228         }
4229
4230         if (reset_mask & RADEON_RESET_DMA)
4231                 srbm_soft_reset |= SOFT_RESET_SDMA;
4232
4233         if (reset_mask & RADEON_RESET_DMA1)
4234                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4235
4236         if (reset_mask & RADEON_RESET_DISPLAY)
4237                 srbm_soft_reset |= SOFT_RESET_DC;
4238
4239         if (reset_mask & RADEON_RESET_RLC)
4240                 grbm_soft_reset |= SOFT_RESET_RLC;
4241
4242         if (reset_mask & RADEON_RESET_SEM)
4243                 srbm_soft_reset |= SOFT_RESET_SEM;
4244
4245         if (reset_mask & RADEON_RESET_IH)
4246                 srbm_soft_reset |= SOFT_RESET_IH;
4247
4248         if (reset_mask & RADEON_RESET_GRBM)
4249                 srbm_soft_reset |= SOFT_RESET_GRBM;
4250
4251         if (reset_mask & RADEON_RESET_VMC)
4252                 srbm_soft_reset |= SOFT_RESET_VMC;
4253
4254         if (!(rdev->flags & RADEON_IS_IGP)) {
4255                 if (reset_mask & RADEON_RESET_MC)
4256                         srbm_soft_reset |= SOFT_RESET_MC;
4257         }
4258
4259         if (grbm_soft_reset) {
4260                 tmp = RREG32(GRBM_SOFT_RESET);
4261                 tmp |= grbm_soft_reset;
4262                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4263                 WREG32(GRBM_SOFT_RESET, tmp);
4264                 tmp = RREG32(GRBM_SOFT_RESET);
4265
4266                 udelay(50);
4267
4268                 tmp &= ~grbm_soft_reset;
4269                 WREG32(GRBM_SOFT_RESET, tmp);
4270                 tmp = RREG32(GRBM_SOFT_RESET);
4271         }
4272
4273         if (srbm_soft_reset) {
4274                 tmp = RREG32(SRBM_SOFT_RESET);
4275                 tmp |= srbm_soft_reset;
4276                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4277                 WREG32(SRBM_SOFT_RESET, tmp);
4278                 tmp = RREG32(SRBM_SOFT_RESET);
4279
4280                 udelay(50);
4281
4282                 tmp &= ~srbm_soft_reset;
4283                 WREG32(SRBM_SOFT_RESET, tmp);
4284                 tmp = RREG32(SRBM_SOFT_RESET);
4285         }
4286
4287         /* Wait a little for things to settle down */
4288         udelay(50);
4289
4290         evergreen_mc_resume(rdev, &save);
4291         udelay(50);
4292
4293         cik_print_gpu_status_regs(rdev);
4294 }
4295
4296 /**
4297  * cik_asic_reset - soft reset GPU
4298  *
4299  * @rdev: radeon_device pointer
4300  *
4301  * Look up which blocks are hung and attempt
4302  * to reset them.
4303  * Returns 0 for success.
4304  */
4305 int cik_asic_reset(struct radeon_device *rdev)
4306 {
4307         u32 reset_mask;
4308
4309         reset_mask = cik_gpu_check_soft_reset(rdev);
4310
4311         if (reset_mask)
4312                 r600_set_bios_scratch_engine_hung(rdev, true);
4313
4314         cik_gpu_soft_reset(rdev, reset_mask);
4315
4316         reset_mask = cik_gpu_check_soft_reset(rdev);
4317
4318         if (!reset_mask)
4319                 r600_set_bios_scratch_engine_hung(rdev, false);
4320
4321         return 0;
4322 }
4323
4324 /**
4325  * cik_gfx_is_lockup - check if the 3D engine is locked up
4326  *
4327  * @rdev: radeon_device pointer
4328  * @ring: radeon_ring structure holding ring information
4329  *
4330  * Check if the 3D engine is locked up (CIK).
4331  * Returns true if the engine is locked, false if not.
4332  */
4333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4334 {
4335         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4336
4337         if (!(reset_mask & (RADEON_RESET_GFX |
4338                             RADEON_RESET_COMPUTE |
4339                             RADEON_RESET_CP))) {
4340                 radeon_ring_lockup_update(ring);
4341                 return false;
4342         }
4343         /* force CP activities */
4344         radeon_ring_force_activity(rdev, ring);
4345         return radeon_ring_test_lockup(rdev, ring);
4346 }
4347
4348 /* MC */
4349 /**
4350  * cik_mc_program - program the GPU memory controller
4351  *
4352  * @rdev: radeon_device pointer
4353  *
4354  * Set the location of vram, gart, and AGP in the GPU's
4355  * physical address space (CIK).
4356  */
4357 static void cik_mc_program(struct radeon_device *rdev)
4358 {
4359         struct evergreen_mc_save save;
4360         u32 tmp;
4361         int i, j;
4362
4363         /* Initialize HDP */
4364         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4365                 WREG32((0x2c14 + j), 0x00000000);
4366                 WREG32((0x2c18 + j), 0x00000000);
4367                 WREG32((0x2c1c + j), 0x00000000);
4368                 WREG32((0x2c20 + j), 0x00000000);
4369                 WREG32((0x2c24 + j), 0x00000000);
4370         }
4371         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4372
4373         evergreen_mc_stop(rdev, &save);
4374         if (radeon_mc_wait_for_idle(rdev)) {
4375                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4376         }
4377         /* Lockout access through VGA aperture*/
4378         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4379         /* Update configuration */
4380         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4381                rdev->mc.vram_start >> 12);
4382         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4383                rdev->mc.vram_end >> 12);
4384         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4385                rdev->vram_scratch.gpu_addr >> 12);
4386         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4387         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4388         WREG32(MC_VM_FB_LOCATION, tmp);
4389         /* XXX double check these! */
4390         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4391         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4392         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4393         WREG32(MC_VM_AGP_BASE, 0);
4394         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4395         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4396         if (radeon_mc_wait_for_idle(rdev)) {
4397                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4398         }
4399         evergreen_mc_resume(rdev, &save);
4400         /* we need to own VRAM, so turn off the VGA renderer here
4401          * to stop it overwriting our objects */
4402         rv515_vga_render_disable(rdev);
4403 }
4404
4405 /**
4406  * cik_mc_init - initialize the memory controller driver params
4407  *
4408  * @rdev: radeon_device pointer
4409  *
4410  * Look up the amount of vram, vram width, and decide how to place
4411  * vram and gart within the GPU's physical address space (CIK).
4412  * Returns 0 for success.
4413  */
4414 static int cik_mc_init(struct radeon_device *rdev)
4415 {
4416         u32 tmp;
4417         int chansize, numchan;
4418
4419         /* Get VRAM informations */
4420         rdev->mc.vram_is_ddr = true;
4421         tmp = RREG32(MC_ARB_RAMCFG);
4422         if (tmp & CHANSIZE_MASK) {
4423                 chansize = 64;
4424         } else {
4425                 chansize = 32;
4426         }
4427         tmp = RREG32(MC_SHARED_CHMAP);
4428         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4429         case 0:
4430         default:
4431                 numchan = 1;
4432                 break;
4433         case 1:
4434                 numchan = 2;
4435                 break;
4436         case 2:
4437                 numchan = 4;
4438                 break;
4439         case 3:
4440                 numchan = 8;
4441                 break;
4442         case 4:
4443                 numchan = 3;
4444                 break;
4445         case 5:
4446                 numchan = 6;
4447                 break;
4448         case 6:
4449                 numchan = 10;
4450                 break;
4451         case 7:
4452                 numchan = 12;
4453                 break;
4454         case 8:
4455                 numchan = 16;
4456                 break;
4457         }
4458         rdev->mc.vram_width = numchan * chansize;
4459         /* Could aper size report 0 ? */
4460         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4461         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4462         /* size in MB on si */
4463         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4464         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4465         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4466         si_vram_gtt_location(rdev, &rdev->mc);
4467         radeon_update_bandwidth_info(rdev);
4468
4469         return 0;
4470 }
4471
4472 /*
4473  * GART
4474  * VMID 0 is the physical GPU addresses as used by the kernel.
4475  * VMIDs 1-15 are used for userspace clients and are handled
4476  * by the radeon vm/hsa code.
4477  */
4478 /**
4479  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4480  *
4481  * @rdev: radeon_device pointer
4482  *
4483  * Flush the TLB for the VMID 0 page table (CIK).
4484  */
4485 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4486 {
4487         /* flush hdp cache */
4488         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4489
4490         /* bits 0-15 are the VM contexts0-15 */
4491         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4492 }
4493
4494 /**
4495  * cik_pcie_gart_enable - gart enable
4496  *
4497  * @rdev: radeon_device pointer
4498  *
4499  * This sets up the TLBs, programs the page tables for VMID0,
4500  * sets up the hw for VMIDs 1-15 which are allocated on
4501  * demand, and sets up the global locations for the LDS, GDS,
4502  * and GPUVM for FSA64 clients (CIK).
4503  * Returns 0 for success, errors for failure.
4504  */
4505 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4506 {
4507         int r, i;
4508
4509         if (rdev->gart.robj == NULL) {
4510                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4511                 return -EINVAL;
4512         }
4513         r = radeon_gart_table_vram_pin(rdev);
4514         if (r)
4515                 return r;
4516         radeon_gart_restore(rdev);
4517         /* Setup TLB control */
4518         WREG32(MC_VM_MX_L1_TLB_CNTL,
4519                (0xA << 7) |
4520                ENABLE_L1_TLB |
4521                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4522                ENABLE_ADVANCED_DRIVER_MODEL |
4523                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4524         /* Setup L2 cache */
4525         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4526                ENABLE_L2_FRAGMENT_PROCESSING |
4527                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4528                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4529                EFFECTIVE_L2_QUEUE_SIZE(7) |
4530                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4531         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4532         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4533                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4534         /* setup context0 */
4535         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4536         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4537         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4538         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4539                         (u32)(rdev->dummy_page.addr >> 12));
4540         WREG32(VM_CONTEXT0_CNTL2, 0);
4541         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4542                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4543
4544         WREG32(0x15D4, 0);
4545         WREG32(0x15D8, 0);
4546         WREG32(0x15DC, 0);
4547
4548         /* empty context1-15 */
4549         /* FIXME start with 4G, once using 2 level pt switch to full
4550          * vm size space
4551          */
4552         /* set vm size, must be a multiple of 4 */
4553         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4554         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4555         for (i = 1; i < 16; i++) {
4556                 if (i < 8)
4557                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4558                                rdev->gart.table_addr >> 12);
4559                 else
4560                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4561                                rdev->gart.table_addr >> 12);
4562         }
4563
4564         /* enable context1-15 */
4565         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4566                (u32)(rdev->dummy_page.addr >> 12));
4567         WREG32(VM_CONTEXT1_CNTL2, 4);
4568         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4569                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4571                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4573                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4575                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4577                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4579                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4581
4582         /* TC cache setup ??? */
4583         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4584         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4585         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4586
4587         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4588         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4589         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4590         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4591         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4592
4593         WREG32(TC_CFG_L1_VOLATILE, 0);
4594         WREG32(TC_CFG_L2_VOLATILE, 0);
4595
4596         if (rdev->family == CHIP_KAVERI) {
4597                 u32 tmp = RREG32(CHUB_CONTROL);
4598                 tmp &= ~BYPASS_VM;
4599                 WREG32(CHUB_CONTROL, tmp);
4600         }
4601
4602         /* XXX SH_MEM regs */
4603         /* where to put LDS, scratch, GPUVM in FSA64 space */
4604         mutex_lock(&rdev->srbm_mutex);
4605         for (i = 0; i < 16; i++) {
4606                 cik_srbm_select(rdev, 0, 0, 0, i);
4607                 /* CP and shaders */
4608                 WREG32(SH_MEM_CONFIG, 0);
4609                 WREG32(SH_MEM_APE1_BASE, 1);
4610                 WREG32(SH_MEM_APE1_LIMIT, 0);
4611                 WREG32(SH_MEM_BASES, 0);
4612                 /* SDMA GFX */
4613                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4614                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4615                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4616                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4617                 /* XXX SDMA RLC - todo */
4618         }
4619         cik_srbm_select(rdev, 0, 0, 0, 0);
4620         mutex_unlock(&rdev->srbm_mutex);
4621
4622         cik_pcie_gart_tlb_flush(rdev);
4623         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4624                  (unsigned)(rdev->mc.gtt_size >> 20),
4625                  (unsigned long long)rdev->gart.table_addr);
4626         rdev->gart.ready = true;
4627         return 0;
4628 }
4629
4630 /**
4631  * cik_pcie_gart_disable - gart disable
4632  *
4633  * @rdev: radeon_device pointer
4634  *
4635  * This disables all VM page table (CIK).
4636  */
4637 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4638 {
4639         /* Disable all tables */
4640         WREG32(VM_CONTEXT0_CNTL, 0);
4641         WREG32(VM_CONTEXT1_CNTL, 0);
4642         /* Setup TLB control */
4643         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4644                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4645         /* Setup L2 cache */
4646         WREG32(VM_L2_CNTL,
4647                ENABLE_L2_FRAGMENT_PROCESSING |
4648                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4649                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4650                EFFECTIVE_L2_QUEUE_SIZE(7) |
4651                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4652         WREG32(VM_L2_CNTL2, 0);
4653         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4654                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4655         radeon_gart_table_vram_unpin(rdev);
4656 }
4657
4658 /**
4659  * cik_pcie_gart_fini - vm fini callback
4660  *
4661  * @rdev: radeon_device pointer
4662  *
4663  * Tears down the driver GART/VM setup (CIK).
4664  */
4665 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4666 {
4667         cik_pcie_gart_disable(rdev);
4668         radeon_gart_table_vram_free(rdev);
4669         radeon_gart_fini(rdev);
4670 }
4671
4672 /* vm parser */
4673 /**
4674  * cik_ib_parse - vm ib_parse callback
4675  *
4676  * @rdev: radeon_device pointer
4677  * @ib: indirect buffer pointer
4678  *
4679  * CIK uses hw IB checking so this is a nop (CIK).
4680  */
4681 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4682 {
4683         return 0;
4684 }
4685
4686 /*
4687  * vm
4688  * VMID 0 is the physical GPU addresses as used by the kernel.
4689  * VMIDs 1-15 are used for userspace clients and are handled
4690  * by the radeon vm/hsa code.
4691  */
4692 /**
4693  * cik_vm_init - cik vm init callback
4694  *
4695  * @rdev: radeon_device pointer
4696  *
4697  * Inits cik specific vm parameters (number of VMs, base of vram for
4698  * VMIDs 1-15) (CIK).
4699  * Returns 0 for success.
4700  */
4701 int cik_vm_init(struct radeon_device *rdev)
4702 {
4703         /* number of VMs */
4704         rdev->vm_manager.nvm = 16;
4705         /* base offset of vram pages */
4706         if (rdev->flags & RADEON_IS_IGP) {
4707                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4708                 tmp <<= 22;
4709                 rdev->vm_manager.vram_base_offset = tmp;
4710         } else
4711                 rdev->vm_manager.vram_base_offset = 0;
4712
4713         return 0;
4714 }
4715
4716 /**
4717  * cik_vm_fini - cik vm fini callback
4718  *
4719  * @rdev: radeon_device pointer
4720  *
4721  * Tear down any asic specific VM setup (CIK).
4722  */
4723 void cik_vm_fini(struct radeon_device *rdev)
4724 {
4725 }
4726
4727 /**
4728  * cik_vm_decode_fault - print human readable fault info
4729  *
4730  * @rdev: radeon_device pointer
4731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4733  *
4734  * Print human readable fault information (CIK).
4735  */
4736 static void cik_vm_decode_fault(struct radeon_device *rdev,
4737                                 u32 status, u32 addr, u32 mc_client)
4738 {
4739         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4740         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4741         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4742         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4743                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4744
4745         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4746                protections, vmid, addr,
4747                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4748                block, mc_client, mc_id);
4749 }
4750
4751 /**
4752  * cik_vm_flush - cik vm flush using the CP
4753  *
4754  * @rdev: radeon_device pointer
4755  *
4756  * Update the page table base and flush the VM TLB
4757  * using the CP (CIK).
4758  */
4759 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4760 {
4761         struct radeon_ring *ring = &rdev->ring[ridx];
4762
4763         if (vm == NULL)
4764                 return;
4765
4766         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4767         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4768                                  WRITE_DATA_DST_SEL(0)));
4769         if (vm->id < 8) {
4770                 radeon_ring_write(ring,
4771                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4772         } else {
4773                 radeon_ring_write(ring,
4774                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4775         }
4776         radeon_ring_write(ring, 0);
4777         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4778
4779         /* update SH_MEM_* regs */
4780         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4781         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4782                                  WRITE_DATA_DST_SEL(0)));
4783         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4784         radeon_ring_write(ring, 0);
4785         radeon_ring_write(ring, VMID(vm->id));
4786
4787         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4788         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4789                                  WRITE_DATA_DST_SEL(0)));
4790         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4791         radeon_ring_write(ring, 0);
4792
4793         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4794         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4795         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4796         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4797
4798         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4799         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4800                                  WRITE_DATA_DST_SEL(0)));
4801         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4802         radeon_ring_write(ring, 0);
4803         radeon_ring_write(ring, VMID(0));
4804
4805         /* HDP flush */
4806         /* We should be using the WAIT_REG_MEM packet here like in
4807          * cik_fence_ring_emit(), but it causes the CP to hang in this
4808          * context...
4809          */
4810         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4811         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4812                                  WRITE_DATA_DST_SEL(0)));
4813         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4814         radeon_ring_write(ring, 0);
4815         radeon_ring_write(ring, 0);
4816
4817         /* bits 0-15 are the VM contexts0-15 */
4818         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4819         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4820                                  WRITE_DATA_DST_SEL(0)));
4821         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4822         radeon_ring_write(ring, 0);
4823         radeon_ring_write(ring, 1 << vm->id);
4824
4825         /* compute doesn't have PFP */
4826         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4827                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4828                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4829                 radeon_ring_write(ring, 0x0);
4830         }
4831 }
4832
4833 /**
4834  * cik_vm_set_page - update the page tables using sDMA
4835  *
4836  * @rdev: radeon_device pointer
4837  * @ib: indirect buffer to fill with commands
4838  * @pe: addr of the page entry
4839  * @addr: dst addr to write into pe
4840  * @count: number of page entries to update
4841  * @incr: increase next addr by incr bytes
4842  * @flags: access flags
4843  *
4844  * Update the page tables using CP or sDMA (CIK).
4845  */
4846 void cik_vm_set_page(struct radeon_device *rdev,
4847                      struct radeon_ib *ib,
4848                      uint64_t pe,
4849                      uint64_t addr, unsigned count,
4850                      uint32_t incr, uint32_t flags)
4851 {
4852         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4853         uint64_t value;
4854         unsigned ndw;
4855
4856         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4857                 /* CP */
4858                 while (count) {
4859                         ndw = 2 + count * 2;
4860                         if (ndw > 0x3FFE)
4861                                 ndw = 0x3FFE;
4862
4863                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4864                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4865                                                     WRITE_DATA_DST_SEL(1));
4866                         ib->ptr[ib->length_dw++] = pe;
4867                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4868                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4869                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4870                                         value = radeon_vm_map_gart(rdev, addr);
4871                                         value &= 0xFFFFFFFFFFFFF000ULL;
4872                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4873                                         value = addr;
4874                                 } else {
4875                                         value = 0;
4876                                 }
4877                                 addr += incr;
4878                                 value |= r600_flags;
4879                                 ib->ptr[ib->length_dw++] = value;
4880                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4881                         }
4882                 }
4883         } else {
4884                 /* DMA */
4885                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4886         }
4887 }
4888
4889 /*
4890  * RLC
4891  * The RLC is a multi-purpose microengine that handles a
4892  * variety of functions, the most important of which is
4893  * the interrupt controller.
4894  */
4895 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4896                                           bool enable)
4897 {
4898         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4899
4900         if (enable)
4901                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4902         else
4903                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4904         WREG32(CP_INT_CNTL_RING0, tmp);
4905 }
4906
4907 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4908 {
4909         u32 tmp;
4910
4911         tmp = RREG32(RLC_LB_CNTL);
4912         if (enable)
4913                 tmp |= LOAD_BALANCE_ENABLE;
4914         else
4915                 tmp &= ~LOAD_BALANCE_ENABLE;
4916         WREG32(RLC_LB_CNTL, tmp);
4917 }
4918
4919 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4920 {
4921         u32 i, j, k;
4922         u32 mask;
4923
4924         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4925                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4926                         cik_select_se_sh(rdev, i, j);
4927                         for (k = 0; k < rdev->usec_timeout; k++) {
4928                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4929                                         break;
4930                                 udelay(1);
4931                         }
4932                 }
4933         }
4934         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4935
4936         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4937         for (k = 0; k < rdev->usec_timeout; k++) {
4938                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4939                         break;
4940                 udelay(1);
4941         }
4942 }
4943
4944 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4945 {
4946         u32 tmp;
4947
4948         tmp = RREG32(RLC_CNTL);
4949         if (tmp != rlc)
4950                 WREG32(RLC_CNTL, rlc);
4951 }
4952
4953 static u32 cik_halt_rlc(struct radeon_device *rdev)
4954 {
4955         u32 data, orig;
4956
4957         orig = data = RREG32(RLC_CNTL);
4958
4959         if (data & RLC_ENABLE) {
4960                 u32 i;
4961
4962                 data &= ~RLC_ENABLE;
4963                 WREG32(RLC_CNTL, data);
4964
4965                 for (i = 0; i < rdev->usec_timeout; i++) {
4966                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4967                                 break;
4968                         udelay(1);
4969                 }
4970
4971                 cik_wait_for_rlc_serdes(rdev);
4972         }
4973
4974         return orig;
4975 }
4976
4977 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4978 {
4979         u32 tmp, i, mask;
4980
4981         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4982         WREG32(RLC_GPR_REG2, tmp);
4983
4984         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4985         for (i = 0; i < rdev->usec_timeout; i++) {
4986                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4987                         break;
4988                 udelay(1);
4989         }
4990
4991         for (i = 0; i < rdev->usec_timeout; i++) {
4992                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4993                         break;
4994                 udelay(1);
4995         }
4996 }
4997
4998 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4999 {
5000         u32 tmp;
5001
5002         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5003         WREG32(RLC_GPR_REG2, tmp);
5004 }
5005
5006 /**
5007  * cik_rlc_stop - stop the RLC ME
5008  *
5009  * @rdev: radeon_device pointer
5010  *
5011  * Halt the RLC ME (MicroEngine) (CIK).
5012  */
5013 static void cik_rlc_stop(struct radeon_device *rdev)
5014 {
5015         WREG32(RLC_CNTL, 0);
5016
5017         cik_enable_gui_idle_interrupt(rdev, false);
5018
5019         cik_wait_for_rlc_serdes(rdev);
5020 }
5021
5022 /**
5023  * cik_rlc_start - start the RLC ME
5024  *
5025  * @rdev: radeon_device pointer
5026  *
5027  * Unhalt the RLC ME (MicroEngine) (CIK).
5028  */
5029 static void cik_rlc_start(struct radeon_device *rdev)
5030 {
5031         WREG32(RLC_CNTL, RLC_ENABLE);
5032
5033         cik_enable_gui_idle_interrupt(rdev, true);
5034
5035         udelay(50);
5036 }
5037
5038 /**
5039  * cik_rlc_resume - setup the RLC hw
5040  *
5041  * @rdev: radeon_device pointer
5042  *
5043  * Initialize the RLC registers, load the ucode,
5044  * and start the RLC (CIK).
5045  * Returns 0 for success, -EINVAL if the ucode is not available.
5046  */
5047 static int cik_rlc_resume(struct radeon_device *rdev)
5048 {
5049         u32 i, size, tmp;
5050         const __be32 *fw_data;
5051
5052         if (!rdev->rlc_fw)
5053                 return -EINVAL;
5054
5055         switch (rdev->family) {
5056         case CHIP_BONAIRE:
5057         default:
5058                 size = BONAIRE_RLC_UCODE_SIZE;
5059                 break;
5060         case CHIP_KAVERI:
5061                 size = KV_RLC_UCODE_SIZE;
5062                 break;
5063         case CHIP_KABINI:
5064                 size = KB_RLC_UCODE_SIZE;
5065                 break;
5066         }
5067
5068         cik_rlc_stop(rdev);
5069
5070         /* disable CG */
5071         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5072         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5073
5074         si_rlc_reset(rdev);
5075
5076         cik_init_pg(rdev);
5077
5078         cik_init_cg(rdev);
5079
5080         WREG32(RLC_LB_CNTR_INIT, 0);
5081         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5082
5083         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5084         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5085         WREG32(RLC_LB_PARAMS, 0x00600408);
5086         WREG32(RLC_LB_CNTL, 0x80000004);
5087
5088         WREG32(RLC_MC_CNTL, 0);
5089         WREG32(RLC_UCODE_CNTL, 0);
5090
5091         fw_data = (const __be32 *)rdev->rlc_fw->data;
5092                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5093         for (i = 0; i < size; i++)
5094                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5095         WREG32(RLC_GPM_UCODE_ADDR, 0);
5096
5097         /* XXX - find out what chips support lbpw */
5098         cik_enable_lbpw(rdev, false);
5099
5100         if (rdev->family == CHIP_BONAIRE)
5101                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5102
5103         cik_rlc_start(rdev);
5104
5105         return 0;
5106 }
5107
5108 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5109 {
5110         u32 data, orig, tmp, tmp2;
5111
5112         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5113
5114         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5115                 cik_enable_gui_idle_interrupt(rdev, true);
5116
5117                 tmp = cik_halt_rlc(rdev);
5118
5119                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5120                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5121                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5122                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5123                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5124
5125                 cik_update_rlc(rdev, tmp);
5126
5127                 data |= CGCG_EN | CGLS_EN;
5128         } else {
5129                 cik_enable_gui_idle_interrupt(rdev, false);
5130
5131                 RREG32(CB_CGTT_SCLK_CTRL);
5132                 RREG32(CB_CGTT_SCLK_CTRL);
5133                 RREG32(CB_CGTT_SCLK_CTRL);
5134                 RREG32(CB_CGTT_SCLK_CTRL);
5135
5136                 data &= ~(CGCG_EN | CGLS_EN);
5137         }
5138
5139         if (orig != data)
5140                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5141
5142 }
5143
5144 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5145 {
5146         u32 data, orig, tmp = 0;
5147
5148         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5149                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5150                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5151                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5152                                 data |= CP_MEM_LS_EN;
5153                                 if (orig != data)
5154                                         WREG32(CP_MEM_SLP_CNTL, data);
5155                         }
5156                 }
5157
5158                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5159                 data &= 0xfffffffd;
5160                 if (orig != data)
5161                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5162
5163                 tmp = cik_halt_rlc(rdev);
5164
5165                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5166                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5167                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5168                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5169                 WREG32(RLC_SERDES_WR_CTRL, data);
5170
5171                 cik_update_rlc(rdev, tmp);
5172
5173                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5174                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5175                         data &= ~SM_MODE_MASK;
5176                         data |= SM_MODE(0x2);
5177                         data |= SM_MODE_ENABLE;
5178                         data &= ~CGTS_OVERRIDE;
5179                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5180                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5181                                 data &= ~CGTS_LS_OVERRIDE;
5182                         data &= ~ON_MONITOR_ADD_MASK;
5183                         data |= ON_MONITOR_ADD_EN;
5184                         data |= ON_MONITOR_ADD(0x96);
5185                         if (orig != data)
5186                                 WREG32(CGTS_SM_CTRL_REG, data);
5187                 }
5188         } else {
5189                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5190                 data |= 0x00000002;
5191                 if (orig != data)
5192                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5193
5194                 data = RREG32(RLC_MEM_SLP_CNTL);
5195                 if (data & RLC_MEM_LS_EN) {
5196                         data &= ~RLC_MEM_LS_EN;
5197                         WREG32(RLC_MEM_SLP_CNTL, data);
5198                 }
5199
5200                 data = RREG32(CP_MEM_SLP_CNTL);
5201                 if (data & CP_MEM_LS_EN) {
5202                         data &= ~CP_MEM_LS_EN;
5203                         WREG32(CP_MEM_SLP_CNTL, data);
5204                 }
5205
5206                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5207                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5208                 if (orig != data)
5209                         WREG32(CGTS_SM_CTRL_REG, data);
5210
5211                 tmp = cik_halt_rlc(rdev);
5212
5213                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5214                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5215                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5216                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5217                 WREG32(RLC_SERDES_WR_CTRL, data);
5218
5219                 cik_update_rlc(rdev, tmp);
5220         }
5221 }
5222
5223 static const u32 mc_cg_registers[] =
5224 {
5225         MC_HUB_MISC_HUB_CG,
5226         MC_HUB_MISC_SIP_CG,
5227         MC_HUB_MISC_VM_CG,
5228         MC_XPB_CLK_GAT,
5229         ATC_MISC_CG,
5230         MC_CITF_MISC_WR_CG,
5231         MC_CITF_MISC_RD_CG,
5232         MC_CITF_MISC_VM_CG,
5233         VM_L2_CG,
5234 };
5235
5236 static void cik_enable_mc_ls(struct radeon_device *rdev,
5237                              bool enable)
5238 {
5239         int i;
5240         u32 orig, data;
5241
5242         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5243                 orig = data = RREG32(mc_cg_registers[i]);
5244                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5245                         data |= MC_LS_ENABLE;
5246                 else
5247                         data &= ~MC_LS_ENABLE;
5248                 if (data != orig)
5249                         WREG32(mc_cg_registers[i], data);
5250         }
5251 }
5252
5253 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5254                                bool enable)
5255 {
5256         int i;
5257         u32 orig, data;
5258
5259         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5260                 orig = data = RREG32(mc_cg_registers[i]);
5261                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5262                         data |= MC_CG_ENABLE;
5263                 else
5264                         data &= ~MC_CG_ENABLE;
5265                 if (data != orig)
5266                         WREG32(mc_cg_registers[i], data);
5267         }
5268 }
5269
5270 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5271                                  bool enable)
5272 {
5273         u32 orig, data;
5274
5275         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5276                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5277                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5278         } else {
5279                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5280                 data |= 0xff000000;
5281                 if (data != orig)
5282                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5283
5284                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5285                 data |= 0xff000000;
5286                 if (data != orig)
5287                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5288         }
5289 }
5290
5291 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5292                                  bool enable)
5293 {
5294         u32 orig, data;
5295
5296         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5297                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5298                 data |= 0x100;
5299                 if (orig != data)
5300                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5301
5302                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5303                 data |= 0x100;
5304                 if (orig != data)
5305                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5306         } else {
5307                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5308                 data &= ~0x100;
5309                 if (orig != data)
5310                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5311
5312                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5313                 data &= ~0x100;
5314                 if (orig != data)
5315                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5316         }
5317 }
5318
5319 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5320                                 bool enable)
5321 {
5322         u32 orig, data;
5323
5324         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5325                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5326                 data = 0xfff;
5327                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5328
5329                 orig = data = RREG32(UVD_CGC_CTRL);
5330                 data |= DCM;
5331                 if (orig != data)
5332                         WREG32(UVD_CGC_CTRL, data);
5333         } else {
5334                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5335                 data &= ~0xfff;
5336                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5337
5338                 orig = data = RREG32(UVD_CGC_CTRL);
5339                 data &= ~DCM;
5340                 if (orig != data)
5341                         WREG32(UVD_CGC_CTRL, data);
5342         }
5343 }
5344
5345 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5346                                bool enable)
5347 {
5348         u32 orig, data;
5349
5350         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5351
5352         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5353                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5354                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5355         else
5356                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5357                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5358
5359         if (orig != data)
5360                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5361 }
5362
5363 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5364                                 bool enable)
5365 {
5366         u32 orig, data;
5367
5368         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5369
5370         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5371                 data &= ~CLOCK_GATING_DIS;
5372         else
5373                 data |= CLOCK_GATING_DIS;
5374
5375         if (orig != data)
5376                 WREG32(HDP_HOST_PATH_CNTL, data);
5377 }
5378
5379 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5380                               bool enable)
5381 {
5382         u32 orig, data;
5383
5384         orig = data = RREG32(HDP_MEM_POWER_LS);
5385
5386         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5387                 data |= HDP_LS_ENABLE;
5388         else
5389                 data &= ~HDP_LS_ENABLE;
5390
5391         if (orig != data)
5392                 WREG32(HDP_MEM_POWER_LS, data);
5393 }
5394
5395 void cik_update_cg(struct radeon_device *rdev,
5396                    u32 block, bool enable)
5397 {
5398
5399         if (block & RADEON_CG_BLOCK_GFX) {
5400                 cik_enable_gui_idle_interrupt(rdev, false);
5401                 /* order matters! */
5402                 if (enable) {
5403                         cik_enable_mgcg(rdev, true);
5404                         cik_enable_cgcg(rdev, true);
5405                 } else {
5406                         cik_enable_cgcg(rdev, false);
5407                         cik_enable_mgcg(rdev, false);
5408                 }
5409                 cik_enable_gui_idle_interrupt(rdev, true);
5410         }
5411
5412         if (block & RADEON_CG_BLOCK_MC) {
5413                 if (!(rdev->flags & RADEON_IS_IGP)) {
5414                         cik_enable_mc_mgcg(rdev, enable);
5415                         cik_enable_mc_ls(rdev, enable);
5416                 }
5417         }
5418
5419         if (block & RADEON_CG_BLOCK_SDMA) {
5420                 cik_enable_sdma_mgcg(rdev, enable);
5421                 cik_enable_sdma_mgls(rdev, enable);
5422         }
5423
5424         if (block & RADEON_CG_BLOCK_BIF) {
5425                 cik_enable_bif_mgls(rdev, enable);
5426         }
5427
5428         if (block & RADEON_CG_BLOCK_UVD) {
5429                 if (rdev->has_uvd)
5430                         cik_enable_uvd_mgcg(rdev, enable);
5431         }
5432
5433         if (block & RADEON_CG_BLOCK_HDP) {
5434                 cik_enable_hdp_mgcg(rdev, enable);
5435                 cik_enable_hdp_ls(rdev, enable);
5436         }
5437 }
5438
5439 static void cik_init_cg(struct radeon_device *rdev)
5440 {
5441
5442         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5443
5444         if (rdev->has_uvd)
5445                 si_init_uvd_internal_cg(rdev);
5446
5447         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5448                              RADEON_CG_BLOCK_SDMA |
5449                              RADEON_CG_BLOCK_BIF |
5450                              RADEON_CG_BLOCK_UVD |
5451                              RADEON_CG_BLOCK_HDP), true);
5452 }
5453
5454 static void cik_fini_cg(struct radeon_device *rdev)
5455 {
5456         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5457                              RADEON_CG_BLOCK_SDMA |
5458                              RADEON_CG_BLOCK_BIF |
5459                              RADEON_CG_BLOCK_UVD |
5460                              RADEON_CG_BLOCK_HDP), false);
5461
5462         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5463 }
5464
5465 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5466                                           bool enable)
5467 {
5468         u32 data, orig;
5469
5470         orig = data = RREG32(RLC_PG_CNTL);
5471         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5472                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5473         else
5474                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5475         if (orig != data)
5476                 WREG32(RLC_PG_CNTL, data);
5477 }
5478
5479 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5480                                           bool enable)
5481 {
5482         u32 data, orig;
5483
5484         orig = data = RREG32(RLC_PG_CNTL);
5485         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5486                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5487         else
5488                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5489         if (orig != data)
5490                 WREG32(RLC_PG_CNTL, data);
5491 }
5492
5493 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5494 {
5495         u32 data, orig;
5496
5497         orig = data = RREG32(RLC_PG_CNTL);
5498         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5499                 data &= ~DISABLE_CP_PG;
5500         else
5501                 data |= DISABLE_CP_PG;
5502         if (orig != data)
5503                 WREG32(RLC_PG_CNTL, data);
5504 }
5505
5506 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5507 {
5508         u32 data, orig;
5509
5510         orig = data = RREG32(RLC_PG_CNTL);
5511         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5512                 data &= ~DISABLE_GDS_PG;
5513         else
5514                 data |= DISABLE_GDS_PG;
5515         if (orig != data)
5516                 WREG32(RLC_PG_CNTL, data);
5517 }
5518
5519 #define CP_ME_TABLE_SIZE    96
5520 #define CP_ME_TABLE_OFFSET  2048
5521 #define CP_MEC_TABLE_OFFSET 4096
5522
5523 void cik_init_cp_pg_table(struct radeon_device *rdev)
5524 {
5525         const __be32 *fw_data;
5526         volatile u32 *dst_ptr;
5527         int me, i, max_me = 4;
5528         u32 bo_offset = 0;
5529         u32 table_offset;
5530
5531         if (rdev->family == CHIP_KAVERI)
5532                 max_me = 5;
5533
5534         if (rdev->rlc.cp_table_ptr == NULL)
5535                 return;
5536
5537         /* write the cp table buffer */
5538         dst_ptr = rdev->rlc.cp_table_ptr;
5539         for (me = 0; me < max_me; me++) {
5540                 if (me == 0) {
5541                         fw_data = (const __be32 *)rdev->ce_fw->data;
5542                         table_offset = CP_ME_TABLE_OFFSET;
5543                 } else if (me == 1) {
5544                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5545                         table_offset = CP_ME_TABLE_OFFSET;
5546                 } else if (me == 2) {
5547                         fw_data = (const __be32 *)rdev->me_fw->data;
5548                         table_offset = CP_ME_TABLE_OFFSET;
5549                 } else {
5550                         fw_data = (const __be32 *)rdev->mec_fw->data;
5551                         table_offset = CP_MEC_TABLE_OFFSET;
5552                 }
5553
5554                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5555                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5556                 }
5557                 bo_offset += CP_ME_TABLE_SIZE;
5558         }
5559 }
5560
5561 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5562                                 bool enable)
5563 {
5564         u32 data, orig;
5565
5566         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5567                 orig = data = RREG32(RLC_PG_CNTL);
5568                 data |= GFX_PG_ENABLE;
5569                 if (orig != data)
5570                         WREG32(RLC_PG_CNTL, data);
5571
5572                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5573                 data |= AUTO_PG_EN;
5574                 if (orig != data)
5575                         WREG32(RLC_AUTO_PG_CTRL, data);
5576         } else {
5577                 orig = data = RREG32(RLC_PG_CNTL);
5578                 data &= ~GFX_PG_ENABLE;
5579                 if (orig != data)
5580                         WREG32(RLC_PG_CNTL, data);
5581
5582                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5583                 data &= ~AUTO_PG_EN;
5584                 if (orig != data)
5585                         WREG32(RLC_AUTO_PG_CTRL, data);
5586
5587                 data = RREG32(DB_RENDER_CONTROL);
5588         }
5589 }
5590
5591 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5592 {
5593         u32 mask = 0, tmp, tmp1;
5594         int i;
5595
5596         cik_select_se_sh(rdev, se, sh);
5597         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5598         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5599         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5600
5601         tmp &= 0xffff0000;
5602
5603         tmp |= tmp1;
5604         tmp >>= 16;
5605
5606         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5607                 mask <<= 1;
5608                 mask |= 1;
5609         }
5610
5611         return (~tmp) & mask;
5612 }
5613
5614 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5615 {
5616         u32 i, j, k, active_cu_number = 0;
5617         u32 mask, counter, cu_bitmap;
5618         u32 tmp = 0;
5619
5620         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5621                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5622                         mask = 1;
5623                         cu_bitmap = 0;
5624                         counter = 0;
5625                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5626                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5627                                         if (counter < 2)
5628                                                 cu_bitmap |= mask;
5629                                         counter ++;
5630                                 }
5631                                 mask <<= 1;
5632                         }
5633
5634                         active_cu_number += counter;
5635                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5636                 }
5637         }
5638
5639         WREG32(RLC_PG_AO_CU_MASK, tmp);
5640
5641         tmp = RREG32(RLC_MAX_PG_CU);
5642         tmp &= ~MAX_PU_CU_MASK;
5643         tmp |= MAX_PU_CU(active_cu_number);
5644         WREG32(RLC_MAX_PG_CU, tmp);
5645 }
5646
5647 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5648                                        bool enable)
5649 {
5650         u32 data, orig;
5651
5652         orig = data = RREG32(RLC_PG_CNTL);
5653         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5654                 data |= STATIC_PER_CU_PG_ENABLE;
5655         else
5656                 data &= ~STATIC_PER_CU_PG_ENABLE;
5657         if (orig != data)
5658                 WREG32(RLC_PG_CNTL, data);
5659 }
5660
5661 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5662                                         bool enable)
5663 {
5664         u32 data, orig;
5665
5666         orig = data = RREG32(RLC_PG_CNTL);
5667         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5668                 data |= DYN_PER_CU_PG_ENABLE;
5669         else
5670                 data &= ~DYN_PER_CU_PG_ENABLE;
5671         if (orig != data)
5672                 WREG32(RLC_PG_CNTL, data);
5673 }
5674
5675 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5676 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5677
5678 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5679 {
5680         u32 data, orig;
5681         u32 i;
5682
5683         if (rdev->rlc.cs_data) {
5684                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5685                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5686                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5687                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5688         } else {
5689                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5690                 for (i = 0; i < 3; i++)
5691                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5692         }
5693         if (rdev->rlc.reg_list) {
5694                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5695                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5696                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5697         }
5698
5699         orig = data = RREG32(RLC_PG_CNTL);
5700         data |= GFX_PG_SRC;
5701         if (orig != data)
5702                 WREG32(RLC_PG_CNTL, data);
5703
5704         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5705         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5706
5707         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5708         data &= ~IDLE_POLL_COUNT_MASK;
5709         data |= IDLE_POLL_COUNT(0x60);
5710         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5711
5712         data = 0x10101010;
5713         WREG32(RLC_PG_DELAY, data);
5714
5715         data = RREG32(RLC_PG_DELAY_2);
5716         data &= ~0xff;
5717         data |= 0x3;
5718         WREG32(RLC_PG_DELAY_2, data);
5719
5720         data = RREG32(RLC_AUTO_PG_CTRL);
5721         data &= ~GRBM_REG_SGIT_MASK;
5722         data |= GRBM_REG_SGIT(0x700);
5723         WREG32(RLC_AUTO_PG_CTRL, data);
5724
5725 }
5726
5727 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5728 {
5729         cik_enable_gfx_cgpg(rdev, enable);
5730         cik_enable_gfx_static_mgpg(rdev, enable);
5731         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5732 }
5733
5734 u32 cik_get_csb_size(struct radeon_device *rdev)
5735 {
5736         u32 count = 0;
5737         const struct cs_section_def *sect = NULL;
5738         const struct cs_extent_def *ext = NULL;
5739
5740         if (rdev->rlc.cs_data == NULL)
5741                 return 0;
5742
5743         /* begin clear state */
5744         count += 2;
5745         /* context control state */
5746         count += 3;
5747
5748         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5749                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5750                         if (sect->id == SECT_CONTEXT)
5751                                 count += 2 + ext->reg_count;
5752                         else
5753                                 return 0;
5754                 }
5755         }
5756         /* pa_sc_raster_config/pa_sc_raster_config1 */
5757         count += 4;
5758         /* end clear state */
5759         count += 2;
5760         /* clear state */
5761         count += 2;
5762
5763         return count;
5764 }
5765
5766 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5767 {
5768         u32 count = 0, i;
5769         const struct cs_section_def *sect = NULL;
5770         const struct cs_extent_def *ext = NULL;
5771
5772         if (rdev->rlc.cs_data == NULL)
5773                 return;
5774         if (buffer == NULL)
5775                 return;
5776
5777         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5778         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5779
5780         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5781         buffer[count++] = 0x80000000;
5782         buffer[count++] = 0x80000000;
5783
5784         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5785                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5786                         if (sect->id == SECT_CONTEXT) {
5787                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5788                                 buffer[count++] = ext->reg_index - 0xa000;
5789                                 for (i = 0; i < ext->reg_count; i++)
5790                                         buffer[count++] = ext->extent[i];
5791                         } else {
5792                                 return;
5793                         }
5794                 }
5795         }
5796
5797         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5798         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5799         switch (rdev->family) {
5800         case CHIP_BONAIRE:
5801                 buffer[count++] = 0x16000012;
5802                 buffer[count++] = 0x00000000;
5803                 break;
5804         case CHIP_KAVERI:
5805                 buffer[count++] = 0x00000000; /* XXX */
5806                 buffer[count++] = 0x00000000;
5807                 break;
5808         case CHIP_KABINI:
5809                 buffer[count++] = 0x00000000; /* XXX */
5810                 buffer[count++] = 0x00000000;
5811                 break;
5812         default:
5813                 buffer[count++] = 0x00000000;
5814                 buffer[count++] = 0x00000000;
5815                 break;
5816         }
5817
5818         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5819         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5820
5821         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5822         buffer[count++] = 0;
5823 }
5824
5825 static void cik_init_pg(struct radeon_device *rdev)
5826 {
5827         if (rdev->pg_flags) {
5828                 cik_enable_sck_slowdown_on_pu(rdev, true);
5829                 cik_enable_sck_slowdown_on_pd(rdev, true);
5830                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5831                         cik_init_gfx_cgpg(rdev);
5832                         cik_enable_cp_pg(rdev, true);
5833                         cik_enable_gds_pg(rdev, true);
5834                 }
5835                 cik_init_ao_cu_mask(rdev);
5836                 cik_update_gfx_pg(rdev, true);
5837         }
5838 }
5839
5840 static void cik_fini_pg(struct radeon_device *rdev)
5841 {
5842         if (rdev->pg_flags) {
5843                 cik_update_gfx_pg(rdev, false);
5844                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5845                         cik_enable_cp_pg(rdev, false);
5846                         cik_enable_gds_pg(rdev, false);
5847                 }
5848         }
5849 }
5850
5851 /*
5852  * Interrupts
5853  * Starting with r6xx, interrupts are handled via a ring buffer.
5854  * Ring buffers are areas of GPU accessible memory that the GPU
5855  * writes interrupt vectors into and the host reads vectors out of.
5856  * There is a rptr (read pointer) that determines where the
5857  * host is currently reading, and a wptr (write pointer)
5858  * which determines where the GPU has written.  When the
5859  * pointers are equal, the ring is idle.  When the GPU
5860  * writes vectors to the ring buffer, it increments the
5861  * wptr.  When there is an interrupt, the host then starts
5862  * fetching commands and processing them until the pointers are
5863  * equal again at which point it updates the rptr.
5864  */
5865
5866 /**
5867  * cik_enable_interrupts - Enable the interrupt ring buffer
5868  *
5869  * @rdev: radeon_device pointer
5870  *
5871  * Enable the interrupt ring buffer (CIK).
5872  */
5873 static void cik_enable_interrupts(struct radeon_device *rdev)
5874 {
5875         u32 ih_cntl = RREG32(IH_CNTL);
5876         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5877
5878         ih_cntl |= ENABLE_INTR;
5879         ih_rb_cntl |= IH_RB_ENABLE;
5880         WREG32(IH_CNTL, ih_cntl);
5881         WREG32(IH_RB_CNTL, ih_rb_cntl);
5882         rdev->ih.enabled = true;
5883 }
5884
5885 /**
5886  * cik_disable_interrupts - Disable the interrupt ring buffer
5887  *
5888  * @rdev: radeon_device pointer
5889  *
5890  * Disable the interrupt ring buffer (CIK).
5891  */
5892 static void cik_disable_interrupts(struct radeon_device *rdev)
5893 {
5894         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5895         u32 ih_cntl = RREG32(IH_CNTL);
5896
5897         ih_rb_cntl &= ~IH_RB_ENABLE;
5898         ih_cntl &= ~ENABLE_INTR;
5899         WREG32(IH_RB_CNTL, ih_rb_cntl);
5900         WREG32(IH_CNTL, ih_cntl);
5901         /* set rptr, wptr to 0 */
5902         WREG32(IH_RB_RPTR, 0);
5903         WREG32(IH_RB_WPTR, 0);
5904         rdev->ih.enabled = false;
5905         rdev->ih.rptr = 0;
5906 }
5907
5908 /**
5909  * cik_disable_interrupt_state - Disable all interrupt sources
5910  *
5911  * @rdev: radeon_device pointer
5912  *
5913  * Clear all interrupt enable bits used by the driver (CIK).
5914  */
5915 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5916 {
5917         u32 tmp;
5918
5919         /* gfx ring */
5920         tmp = RREG32(CP_INT_CNTL_RING0) &
5921                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5922         WREG32(CP_INT_CNTL_RING0, tmp);
5923         /* sdma */
5924         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5925         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5926         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5927         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5928         /* compute queues */
5929         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5930         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5931         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5932         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5933         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5934         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5935         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5936         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5937         /* grbm */
5938         WREG32(GRBM_INT_CNTL, 0);
5939         /* vline/vblank, etc. */
5940         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5941         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5942         if (rdev->num_crtc >= 4) {
5943                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5944                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5945         }
5946         if (rdev->num_crtc >= 6) {
5947                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5948                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5949         }
5950
5951         /* dac hotplug */
5952         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5953
5954         /* digital hotplug */
5955         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956         WREG32(DC_HPD1_INT_CONTROL, tmp);
5957         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958         WREG32(DC_HPD2_INT_CONTROL, tmp);
5959         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960         WREG32(DC_HPD3_INT_CONTROL, tmp);
5961         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962         WREG32(DC_HPD4_INT_CONTROL, tmp);
5963         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964         WREG32(DC_HPD5_INT_CONTROL, tmp);
5965         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966         WREG32(DC_HPD6_INT_CONTROL, tmp);
5967
5968 }
5969
5970 /**
5971  * cik_irq_init - init and enable the interrupt ring
5972  *
5973  * @rdev: radeon_device pointer
5974  *
5975  * Allocate a ring buffer for the interrupt controller,
5976  * enable the RLC, disable interrupts, enable the IH
5977  * ring buffer and enable it (CIK).
5978  * Called at device load and reume.
5979  * Returns 0 for success, errors for failure.
5980  */
5981 static int cik_irq_init(struct radeon_device *rdev)
5982 {
5983         int ret = 0;
5984         int rb_bufsz;
5985         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5986
5987         /* allocate ring */
5988         ret = r600_ih_ring_alloc(rdev);
5989         if (ret)
5990                 return ret;
5991
5992         /* disable irqs */
5993         cik_disable_interrupts(rdev);
5994
5995         /* init rlc */
5996         ret = cik_rlc_resume(rdev);
5997         if (ret) {
5998                 r600_ih_ring_fini(rdev);
5999                 return ret;
6000         }
6001
6002         /* setup interrupt control */
6003         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6004         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6005         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6006         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6007          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6008          */
6009         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6010         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6011         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6012         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6013
6014         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6015         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6016
6017         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6018                       IH_WPTR_OVERFLOW_CLEAR |
6019                       (rb_bufsz << 1));
6020
6021         if (rdev->wb.enabled)
6022                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6023
6024         /* set the writeback address whether it's enabled or not */
6025         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6026         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6027
6028         WREG32(IH_RB_CNTL, ih_rb_cntl);
6029
6030         /* set rptr, wptr to 0 */
6031         WREG32(IH_RB_RPTR, 0);
6032         WREG32(IH_RB_WPTR, 0);
6033
6034         /* Default settings for IH_CNTL (disabled at first) */
6035         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6036         /* RPTR_REARM only works if msi's are enabled */
6037         if (rdev->msi_enabled)
6038                 ih_cntl |= RPTR_REARM;
6039         WREG32(IH_CNTL, ih_cntl);
6040
6041         /* force the active interrupt state to all disabled */
6042         cik_disable_interrupt_state(rdev);
6043
6044         pci_set_master(rdev->pdev);
6045
6046         /* enable irqs */
6047         cik_enable_interrupts(rdev);
6048
6049         return ret;
6050 }
6051
6052 /**
6053  * cik_irq_set - enable/disable interrupt sources
6054  *
6055  * @rdev: radeon_device pointer
6056  *
6057  * Enable interrupt sources on the GPU (vblanks, hpd,
6058  * etc.) (CIK).
6059  * Returns 0 for success, errors for failure.
6060  */
6061 int cik_irq_set(struct radeon_device *rdev)
6062 {
6063         u32 cp_int_cntl;
6064         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6065         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6066         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6067         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6068         u32 grbm_int_cntl = 0;
6069         u32 dma_cntl, dma_cntl1;
6070         u32 thermal_int;
6071
6072         if (!rdev->irq.installed) {
6073                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6074                 return -EINVAL;
6075         }
6076         /* don't enable anything if the ih is disabled */
6077         if (!rdev->ih.enabled) {
6078                 cik_disable_interrupts(rdev);
6079                 /* force the active interrupt state to all disabled */
6080                 cik_disable_interrupt_state(rdev);
6081                 return 0;
6082         }
6083
6084         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6085                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6086         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6087
6088         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6090         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6091         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6092         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094
6095         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6096         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6097
6098         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6102         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106
6107         if (rdev->flags & RADEON_IS_IGP)
6108                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6109                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6110         else
6111                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6112                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6113
6114         /* enable CP interrupts on all rings */
6115         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6116                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6117                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6118         }
6119         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6120                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6121                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6122                 if (ring->me == 1) {
6123                         switch (ring->pipe) {
6124                         case 0:
6125                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6126                                 break;
6127                         case 1:
6128                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6129                                 break;
6130                         case 2:
6131                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6132                                 break;
6133                         case 3:
6134                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6135                                 break;
6136                         default:
6137                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6138                                 break;
6139                         }
6140                 } else if (ring->me == 2) {
6141                         switch (ring->pipe) {
6142                         case 0:
6143                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6144                                 break;
6145                         case 1:
6146                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6147                                 break;
6148                         case 2:
6149                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6150                                 break;
6151                         case 3:
6152                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6153                                 break;
6154                         default:
6155                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6156                                 break;
6157                         }
6158                 } else {
6159                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6160                 }
6161         }
6162         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6163                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6164                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6165                 if (ring->me == 1) {
6166                         switch (ring->pipe) {
6167                         case 0:
6168                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6169                                 break;
6170                         case 1:
6171                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6172                                 break;
6173                         case 2:
6174                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6175                                 break;
6176                         case 3:
6177                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6178                                 break;
6179                         default:
6180                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6181                                 break;
6182                         }
6183                 } else if (ring->me == 2) {
6184                         switch (ring->pipe) {
6185                         case 0:
6186                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6187                                 break;
6188                         case 1:
6189                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6190                                 break;
6191                         case 2:
6192                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6193                                 break;
6194                         case 3:
6195                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6196                                 break;
6197                         default:
6198                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6199                                 break;
6200                         }
6201                 } else {
6202                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6203                 }
6204         }
6205
6206         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6207                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6208                 dma_cntl |= TRAP_ENABLE;
6209         }
6210
6211         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6212                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6213                 dma_cntl1 |= TRAP_ENABLE;
6214         }
6215
6216         if (rdev->irq.crtc_vblank_int[0] ||
6217             atomic_read(&rdev->irq.pflip[0])) {
6218                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6219                 crtc1 |= VBLANK_INTERRUPT_MASK;
6220         }
6221         if (rdev->irq.crtc_vblank_int[1] ||
6222             atomic_read(&rdev->irq.pflip[1])) {
6223                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6224                 crtc2 |= VBLANK_INTERRUPT_MASK;
6225         }
6226         if (rdev->irq.crtc_vblank_int[2] ||
6227             atomic_read(&rdev->irq.pflip[2])) {
6228                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6229                 crtc3 |= VBLANK_INTERRUPT_MASK;
6230         }
6231         if (rdev->irq.crtc_vblank_int[3] ||
6232             atomic_read(&rdev->irq.pflip[3])) {
6233                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6234                 crtc4 |= VBLANK_INTERRUPT_MASK;
6235         }
6236         if (rdev->irq.crtc_vblank_int[4] ||
6237             atomic_read(&rdev->irq.pflip[4])) {
6238                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6239                 crtc5 |= VBLANK_INTERRUPT_MASK;
6240         }
6241         if (rdev->irq.crtc_vblank_int[5] ||
6242             atomic_read(&rdev->irq.pflip[5])) {
6243                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6244                 crtc6 |= VBLANK_INTERRUPT_MASK;
6245         }
6246         if (rdev->irq.hpd[0]) {
6247                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6248                 hpd1 |= DC_HPDx_INT_EN;
6249         }
6250         if (rdev->irq.hpd[1]) {
6251                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6252                 hpd2 |= DC_HPDx_INT_EN;
6253         }
6254         if (rdev->irq.hpd[2]) {
6255                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6256                 hpd3 |= DC_HPDx_INT_EN;
6257         }
6258         if (rdev->irq.hpd[3]) {
6259                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6260                 hpd4 |= DC_HPDx_INT_EN;
6261         }
6262         if (rdev->irq.hpd[4]) {
6263                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6264                 hpd5 |= DC_HPDx_INT_EN;
6265         }
6266         if (rdev->irq.hpd[5]) {
6267                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6268                 hpd6 |= DC_HPDx_INT_EN;
6269         }
6270
6271         if (rdev->irq.dpm_thermal) {
6272                 DRM_DEBUG("dpm thermal\n");
6273                 if (rdev->flags & RADEON_IS_IGP)
6274                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6275                 else
6276                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6277         }
6278
6279         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6280
6281         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6282         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6283
6284         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6285         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6286         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6287         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6288         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6289         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6290         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6291         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6292
6293         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6294
6295         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6296         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6297         if (rdev->num_crtc >= 4) {
6298                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6299                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6300         }
6301         if (rdev->num_crtc >= 6) {
6302                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6303                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6304         }
6305
6306         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6307         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6308         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6309         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6310         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6311         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6312
6313         if (rdev->flags & RADEON_IS_IGP)
6314                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6315         else
6316                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6317
6318         return 0;
6319 }
6320
6321 /**
6322  * cik_irq_ack - ack interrupt sources
6323  *
6324  * @rdev: radeon_device pointer
6325  *
6326  * Ack interrupt sources on the GPU (vblanks, hpd,
6327  * etc.) (CIK).  Certain interrupts sources are sw
6328  * generated and do not require an explicit ack.
6329  */
6330 static inline void cik_irq_ack(struct radeon_device *rdev)
6331 {
6332         u32 tmp;
6333
6334         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6335         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6336         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6337         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6338         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6339         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6340         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6341
6342         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6343                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6344         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6345                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6346         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6347                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6348         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6349                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6350
6351         if (rdev->num_crtc >= 4) {
6352                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6353                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6354                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6355                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6356                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6357                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6358                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6359                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6360         }
6361
6362         if (rdev->num_crtc >= 6) {
6363                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6364                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6365                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6366                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6367                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6368                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6369                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6370                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6371         }
6372
6373         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6374                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6375                 tmp |= DC_HPDx_INT_ACK;
6376                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6377         }
6378         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6379                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6380                 tmp |= DC_HPDx_INT_ACK;
6381                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6382         }
6383         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6384                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6385                 tmp |= DC_HPDx_INT_ACK;
6386                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6387         }
6388         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6389                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6390                 tmp |= DC_HPDx_INT_ACK;
6391                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6392         }
6393         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6394                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6395                 tmp |= DC_HPDx_INT_ACK;
6396                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6397         }
6398         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6399                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6400                 tmp |= DC_HPDx_INT_ACK;
6401                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6402         }
6403 }
6404
6405 /**
6406  * cik_irq_disable - disable interrupts
6407  *
6408  * @rdev: radeon_device pointer
6409  *
6410  * Disable interrupts on the hw (CIK).
6411  */
6412 static void cik_irq_disable(struct radeon_device *rdev)
6413 {
6414         cik_disable_interrupts(rdev);
6415         /* Wait and acknowledge irq */
6416         mdelay(1);
6417         cik_irq_ack(rdev);
6418         cik_disable_interrupt_state(rdev);
6419 }
6420
6421 /**
6422  * cik_irq_disable - disable interrupts for suspend
6423  *
6424  * @rdev: radeon_device pointer
6425  *
6426  * Disable interrupts and stop the RLC (CIK).
6427  * Used for suspend.
6428  */
6429 static void cik_irq_suspend(struct radeon_device *rdev)
6430 {
6431         cik_irq_disable(rdev);
6432         cik_rlc_stop(rdev);
6433 }
6434
6435 /**
6436  * cik_irq_fini - tear down interrupt support
6437  *
6438  * @rdev: radeon_device pointer
6439  *
6440  * Disable interrupts on the hw and free the IH ring
6441  * buffer (CIK).
6442  * Used for driver unload.
6443  */
6444 static void cik_irq_fini(struct radeon_device *rdev)
6445 {
6446         cik_irq_suspend(rdev);
6447         r600_ih_ring_fini(rdev);
6448 }
6449
6450 /**
6451  * cik_get_ih_wptr - get the IH ring buffer wptr
6452  *
6453  * @rdev: radeon_device pointer
6454  *
6455  * Get the IH ring buffer wptr from either the register
6456  * or the writeback memory buffer (CIK).  Also check for
6457  * ring buffer overflow and deal with it.
6458  * Used by cik_irq_process().
6459  * Returns the value of the wptr.
6460  */
6461 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6462 {
6463         u32 wptr, tmp;
6464
6465         if (rdev->wb.enabled)
6466                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6467         else
6468                 wptr = RREG32(IH_RB_WPTR);
6469
6470         if (wptr & RB_OVERFLOW) {
6471                 /* When a ring buffer overflow happen start parsing interrupt
6472                  * from the last not overwritten vector (wptr + 16). Hopefully
6473                  * this should allow us to catchup.
6474                  */
6475                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6476                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6477                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6478                 tmp = RREG32(IH_RB_CNTL);
6479                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6480                 WREG32(IH_RB_CNTL, tmp);
6481         }
6482         return (wptr & rdev->ih.ptr_mask);
6483 }
6484
6485 /*        CIK IV Ring
6486  * Each IV ring entry is 128 bits:
6487  * [7:0]    - interrupt source id
6488  * [31:8]   - reserved
6489  * [59:32]  - interrupt source data
6490  * [63:60]  - reserved
6491  * [71:64]  - RINGID
6492  *            CP:
6493  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6494  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6495  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6496  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6497  *            PIPE_ID - ME0 0=3D
6498  *                    - ME1&2 compute dispatcher (4 pipes each)
6499  *            SDMA:
6500  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6501  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6502  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6503  * [79:72]  - VMID
6504  * [95:80]  - PASID
6505  * [127:96] - reserved
6506  */
6507 /**
6508  * cik_irq_process - interrupt handler
6509  *
6510  * @rdev: radeon_device pointer
6511  *
6512  * Interrupt hander (CIK).  Walk the IH ring,
6513  * ack interrupts and schedule work to handle
6514  * interrupt events.
6515  * Returns irq process return code.
6516  */
6517 int cik_irq_process(struct radeon_device *rdev)
6518 {
6519         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6520         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6521         u32 wptr;
6522         u32 rptr;
6523         u32 src_id, src_data, ring_id;
6524         u8 me_id, pipe_id, queue_id;
6525         u32 ring_index;
6526         bool queue_hotplug = false;
6527         bool queue_reset = false;
6528         u32 addr, status, mc_client;
6529         bool queue_thermal = false;
6530
6531         if (!rdev->ih.enabled || rdev->shutdown)
6532                 return IRQ_NONE;
6533
6534         wptr = cik_get_ih_wptr(rdev);
6535
6536 restart_ih:
6537         /* is somebody else already processing irqs? */
6538         if (atomic_xchg(&rdev->ih.lock, 1))
6539                 return IRQ_NONE;
6540
6541         rptr = rdev->ih.rptr;
6542         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6543
6544         /* Order reading of wptr vs. reading of IH ring data */
6545         rmb();
6546
6547         /* display interrupts */
6548         cik_irq_ack(rdev);
6549
6550         while (rptr != wptr) {
6551                 /* wptr/rptr are in bytes! */
6552                 ring_index = rptr / 4;
6553                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6554                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6555                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6556
6557                 switch (src_id) {
6558                 case 1: /* D1 vblank/vline */
6559                         switch (src_data) {
6560                         case 0: /* D1 vblank */
6561                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6562                                         if (rdev->irq.crtc_vblank_int[0]) {
6563                                                 drm_handle_vblank(rdev->ddev, 0);
6564                                                 rdev->pm.vblank_sync = true;
6565                                                 wake_up(&rdev->irq.vblank_queue);
6566                                         }
6567                                         if (atomic_read(&rdev->irq.pflip[0]))
6568                                                 radeon_crtc_handle_flip(rdev, 0);
6569                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6570                                         DRM_DEBUG("IH: D1 vblank\n");
6571                                 }
6572                                 break;
6573                         case 1: /* D1 vline */
6574                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6575                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6576                                         DRM_DEBUG("IH: D1 vline\n");
6577                                 }
6578                                 break;
6579                         default:
6580                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581                                 break;
6582                         }
6583                         break;
6584                 case 2: /* D2 vblank/vline */
6585                         switch (src_data) {
6586                         case 0: /* D2 vblank */
6587                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6588                                         if (rdev->irq.crtc_vblank_int[1]) {
6589                                                 drm_handle_vblank(rdev->ddev, 1);
6590                                                 rdev->pm.vblank_sync = true;
6591                                                 wake_up(&rdev->irq.vblank_queue);
6592                                         }
6593                                         if (atomic_read(&rdev->irq.pflip[1]))
6594                                                 radeon_crtc_handle_flip(rdev, 1);
6595                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6596                                         DRM_DEBUG("IH: D2 vblank\n");
6597                                 }
6598                                 break;
6599                         case 1: /* D2 vline */
6600                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6601                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6602                                         DRM_DEBUG("IH: D2 vline\n");
6603                                 }
6604                                 break;
6605                         default:
6606                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6607                                 break;
6608                         }
6609                         break;
6610                 case 3: /* D3 vblank/vline */
6611                         switch (src_data) {
6612                         case 0: /* D3 vblank */
6613                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6614                                         if (rdev->irq.crtc_vblank_int[2]) {
6615                                                 drm_handle_vblank(rdev->ddev, 2);
6616                                                 rdev->pm.vblank_sync = true;
6617                                                 wake_up(&rdev->irq.vblank_queue);
6618                                         }
6619                                         if (atomic_read(&rdev->irq.pflip[2]))
6620                                                 radeon_crtc_handle_flip(rdev, 2);
6621                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6622                                         DRM_DEBUG("IH: D3 vblank\n");
6623                                 }
6624                                 break;
6625                         case 1: /* D3 vline */
6626                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6627                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6628                                         DRM_DEBUG("IH: D3 vline\n");
6629                                 }
6630                                 break;
6631                         default:
6632                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6633                                 break;
6634                         }
6635                         break;
6636                 case 4: /* D4 vblank/vline */
6637                         switch (src_data) {
6638                         case 0: /* D4 vblank */
6639                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6640                                         if (rdev->irq.crtc_vblank_int[3]) {
6641                                                 drm_handle_vblank(rdev->ddev, 3);
6642                                                 rdev->pm.vblank_sync = true;
6643                                                 wake_up(&rdev->irq.vblank_queue);
6644                                         }
6645                                         if (atomic_read(&rdev->irq.pflip[3]))
6646                                                 radeon_crtc_handle_flip(rdev, 3);
6647                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6648                                         DRM_DEBUG("IH: D4 vblank\n");
6649                                 }
6650                                 break;
6651                         case 1: /* D4 vline */
6652                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6653                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6654                                         DRM_DEBUG("IH: D4 vline\n");
6655                                 }
6656                                 break;
6657                         default:
6658                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6659                                 break;
6660                         }
6661                         break;
6662                 case 5: /* D5 vblank/vline */
6663                         switch (src_data) {
6664                         case 0: /* D5 vblank */
6665                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6666                                         if (rdev->irq.crtc_vblank_int[4]) {
6667                                                 drm_handle_vblank(rdev->ddev, 4);
6668                                                 rdev->pm.vblank_sync = true;
6669                                                 wake_up(&rdev->irq.vblank_queue);
6670                                         }
6671                                         if (atomic_read(&rdev->irq.pflip[4]))
6672                                                 radeon_crtc_handle_flip(rdev, 4);
6673                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6674                                         DRM_DEBUG("IH: D5 vblank\n");
6675                                 }
6676                                 break;
6677                         case 1: /* D5 vline */
6678                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6679                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6680                                         DRM_DEBUG("IH: D5 vline\n");
6681                                 }
6682                                 break;
6683                         default:
6684                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6685                                 break;
6686                         }
6687                         break;
6688                 case 6: /* D6 vblank/vline */
6689                         switch (src_data) {
6690                         case 0: /* D6 vblank */
6691                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6692                                         if (rdev->irq.crtc_vblank_int[5]) {
6693                                                 drm_handle_vblank(rdev->ddev, 5);
6694                                                 rdev->pm.vblank_sync = true;
6695                                                 wake_up(&rdev->irq.vblank_queue);
6696                                         }
6697                                         if (atomic_read(&rdev->irq.pflip[5]))
6698                                                 radeon_crtc_handle_flip(rdev, 5);
6699                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6700                                         DRM_DEBUG("IH: D6 vblank\n");
6701                                 }
6702                                 break;
6703                         case 1: /* D6 vline */
6704                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6705                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6706                                         DRM_DEBUG("IH: D6 vline\n");
6707                                 }
6708                                 break;
6709                         default:
6710                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6711                                 break;
6712                         }
6713                         break;
6714                 case 42: /* HPD hotplug */
6715                         switch (src_data) {
6716                         case 0:
6717                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6718                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6719                                         queue_hotplug = true;
6720                                         DRM_DEBUG("IH: HPD1\n");
6721                                 }
6722                                 break;
6723                         case 1:
6724                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6725                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6726                                         queue_hotplug = true;
6727                                         DRM_DEBUG("IH: HPD2\n");
6728                                 }
6729                                 break;
6730                         case 2:
6731                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6732                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6733                                         queue_hotplug = true;
6734                                         DRM_DEBUG("IH: HPD3\n");
6735                                 }
6736                                 break;
6737                         case 3:
6738                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6739                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6740                                         queue_hotplug = true;
6741                                         DRM_DEBUG("IH: HPD4\n");
6742                                 }
6743                                 break;
6744                         case 4:
6745                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6746                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6747                                         queue_hotplug = true;
6748                                         DRM_DEBUG("IH: HPD5\n");
6749                                 }
6750                                 break;
6751                         case 5:
6752                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6753                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6754                                         queue_hotplug = true;
6755                                         DRM_DEBUG("IH: HPD6\n");
6756                                 }
6757                                 break;
6758                         default:
6759                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6760                                 break;
6761                         }
6762                         break;
6763                 case 124: /* UVD */
6764                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6765                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6766                         break;
6767                 case 146:
6768                 case 147:
6769                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6770                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6771                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6772                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6773                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6774                                 addr);
6775                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6776                                 status);
6777                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6778                         /* reset addr and status */
6779                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6780                         break;
6781                 case 176: /* GFX RB CP_INT */
6782                 case 177: /* GFX IB CP_INT */
6783                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6784                         break;
6785                 case 181: /* CP EOP event */
6786                         DRM_DEBUG("IH: CP EOP\n");
6787                         /* XXX check the bitfield order! */
6788                         me_id = (ring_id & 0x60) >> 5;
6789                         pipe_id = (ring_id & 0x18) >> 3;
6790                         queue_id = (ring_id & 0x7) >> 0;
6791                         switch (me_id) {
6792                         case 0:
6793                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6794                                 break;
6795                         case 1:
6796                         case 2:
6797                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6798                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6800                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6801                                 break;
6802                         }
6803                         break;
6804                 case 184: /* CP Privileged reg access */
6805                         DRM_ERROR("Illegal register access in command stream\n");
6806                         /* XXX check the bitfield order! */
6807                         me_id = (ring_id & 0x60) >> 5;
6808                         pipe_id = (ring_id & 0x18) >> 3;
6809                         queue_id = (ring_id & 0x7) >> 0;
6810                         switch (me_id) {
6811                         case 0:
6812                                 /* This results in a full GPU reset, but all we need to do is soft
6813                                  * reset the CP for gfx
6814                                  */
6815                                 queue_reset = true;
6816                                 break;
6817                         case 1:
6818                                 /* XXX compute */
6819                                 queue_reset = true;
6820                                 break;
6821                         case 2:
6822                                 /* XXX compute */
6823                                 queue_reset = true;
6824                                 break;
6825                         }
6826                         break;
6827                 case 185: /* CP Privileged inst */
6828                         DRM_ERROR("Illegal instruction in command stream\n");
6829                         /* XXX check the bitfield order! */
6830                         me_id = (ring_id & 0x60) >> 5;
6831                         pipe_id = (ring_id & 0x18) >> 3;
6832                         queue_id = (ring_id & 0x7) >> 0;
6833                         switch (me_id) {
6834                         case 0:
6835                                 /* This results in a full GPU reset, but all we need to do is soft
6836                                  * reset the CP for gfx
6837                                  */
6838                                 queue_reset = true;
6839                                 break;
6840                         case 1:
6841                                 /* XXX compute */
6842                                 queue_reset = true;
6843                                 break;
6844                         case 2:
6845                                 /* XXX compute */
6846                                 queue_reset = true;
6847                                 break;
6848                         }
6849                         break;
6850                 case 224: /* SDMA trap event */
6851                         /* XXX check the bitfield order! */
6852                         me_id = (ring_id & 0x3) >> 0;
6853                         queue_id = (ring_id & 0xc) >> 2;
6854                         DRM_DEBUG("IH: SDMA trap\n");
6855                         switch (me_id) {
6856                         case 0:
6857                                 switch (queue_id) {
6858                                 case 0:
6859                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6860                                         break;
6861                                 case 1:
6862                                         /* XXX compute */
6863                                         break;
6864                                 case 2:
6865                                         /* XXX compute */
6866                                         break;
6867                                 }
6868                                 break;
6869                         case 1:
6870                                 switch (queue_id) {
6871                                 case 0:
6872                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6873                                         break;
6874                                 case 1:
6875                                         /* XXX compute */
6876                                         break;
6877                                 case 2:
6878                                         /* XXX compute */
6879                                         break;
6880                                 }
6881                                 break;
6882                         }
6883                         break;
6884                 case 230: /* thermal low to high */
6885                         DRM_DEBUG("IH: thermal low to high\n");
6886                         rdev->pm.dpm.thermal.high_to_low = false;
6887                         queue_thermal = true;
6888                         break;
6889                 case 231: /* thermal high to low */
6890                         DRM_DEBUG("IH: thermal high to low\n");
6891                         rdev->pm.dpm.thermal.high_to_low = true;
6892                         queue_thermal = true;
6893                         break;
6894                 case 233: /* GUI IDLE */
6895                         DRM_DEBUG("IH: GUI idle\n");
6896                         break;
6897                 case 241: /* SDMA Privileged inst */
6898                 case 247: /* SDMA Privileged inst */
6899                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6900                         /* XXX check the bitfield order! */
6901                         me_id = (ring_id & 0x3) >> 0;
6902                         queue_id = (ring_id & 0xc) >> 2;
6903                         switch (me_id) {
6904                         case 0:
6905                                 switch (queue_id) {
6906                                 case 0:
6907                                         queue_reset = true;
6908                                         break;
6909                                 case 1:
6910                                         /* XXX compute */
6911                                         queue_reset = true;
6912                                         break;
6913                                 case 2:
6914                                         /* XXX compute */
6915                                         queue_reset = true;
6916                                         break;
6917                                 }
6918                                 break;
6919                         case 1:
6920                                 switch (queue_id) {
6921                                 case 0:
6922                                         queue_reset = true;
6923                                         break;
6924                                 case 1:
6925                                         /* XXX compute */
6926                                         queue_reset = true;
6927                                         break;
6928                                 case 2:
6929                                         /* XXX compute */
6930                                         queue_reset = true;
6931                                         break;
6932                                 }
6933                                 break;
6934                         }
6935                         break;
6936                 default:
6937                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6938                         break;
6939                 }
6940
6941                 /* wptr/rptr are in bytes! */
6942                 rptr += 16;
6943                 rptr &= rdev->ih.ptr_mask;
6944         }
6945         if (queue_hotplug)
6946                 schedule_work(&rdev->hotplug_work);
6947         if (queue_reset)
6948                 schedule_work(&rdev->reset_work);
6949         if (queue_thermal)
6950                 schedule_work(&rdev->pm.dpm.thermal.work);
6951         rdev->ih.rptr = rptr;
6952         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6953         atomic_set(&rdev->ih.lock, 0);
6954
6955         /* make sure wptr hasn't changed while processing */
6956         wptr = cik_get_ih_wptr(rdev);
6957         if (wptr != rptr)
6958                 goto restart_ih;
6959
6960         return IRQ_HANDLED;
6961 }
6962
6963 /*
6964  * startup/shutdown callbacks
6965  */
6966 /**
6967  * cik_startup - program the asic to a functional state
6968  *
6969  * @rdev: radeon_device pointer
6970  *
6971  * Programs the asic to a functional state (CIK).
6972  * Called by cik_init() and cik_resume().
6973  * Returns 0 for success, error for failure.
6974  */
6975 static int cik_startup(struct radeon_device *rdev)
6976 {
6977         struct radeon_ring *ring;
6978         int r;
6979
6980         /* enable pcie gen2/3 link */
6981         cik_pcie_gen3_enable(rdev);
6982         /* enable aspm */
6983         cik_program_aspm(rdev);
6984
6985         /* scratch needs to be initialized before MC */
6986         r = r600_vram_scratch_init(rdev);
6987         if (r)
6988                 return r;
6989
6990         cik_mc_program(rdev);
6991
6992         if (rdev->flags & RADEON_IS_IGP) {
6993                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6994                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6995                         r = cik_init_microcode(rdev);
6996                         if (r) {
6997                                 DRM_ERROR("Failed to load firmware!\n");
6998                                 return r;
6999                         }
7000                 }
7001         } else {
7002                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7003                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7004                     !rdev->mc_fw) {
7005                         r = cik_init_microcode(rdev);
7006                         if (r) {
7007                                 DRM_ERROR("Failed to load firmware!\n");
7008                                 return r;
7009                         }
7010                 }
7011
7012                 r = ci_mc_load_microcode(rdev);
7013                 if (r) {
7014                         DRM_ERROR("Failed to load MC firmware!\n");
7015                         return r;
7016                 }
7017         }
7018
7019         r = cik_pcie_gart_enable(rdev);
7020         if (r)
7021                 return r;
7022         cik_gpu_init(rdev);
7023
7024         /* allocate rlc buffers */
7025         if (rdev->flags & RADEON_IS_IGP) {
7026                 if (rdev->family == CHIP_KAVERI) {
7027                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7028                         rdev->rlc.reg_list_size =
7029                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7030                 } else {
7031                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7032                         rdev->rlc.reg_list_size =
7033                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7034                 }
7035         }
7036         rdev->rlc.cs_data = ci_cs_data;
7037         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7038         r = sumo_rlc_init(rdev);
7039         if (r) {
7040                 DRM_ERROR("Failed to init rlc BOs!\n");
7041                 return r;
7042         }
7043
7044         /* allocate wb buffer */
7045         r = radeon_wb_init(rdev);
7046         if (r)
7047                 return r;
7048
7049         /* allocate mec buffers */
7050         r = cik_mec_init(rdev);
7051         if (r) {
7052                 DRM_ERROR("Failed to init MEC BOs!\n");
7053                 return r;
7054         }
7055
7056         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7057         if (r) {
7058                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7059                 return r;
7060         }
7061
7062         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7063         if (r) {
7064                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7065                 return r;
7066         }
7067
7068         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7069         if (r) {
7070                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7071                 return r;
7072         }
7073
7074         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7075         if (r) {
7076                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7077                 return r;
7078         }
7079
7080         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7081         if (r) {
7082                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7083                 return r;
7084         }
7085
7086         r = radeon_uvd_resume(rdev);
7087         if (!r) {
7088                 r = uvd_v4_2_resume(rdev);
7089                 if (!r) {
7090                         r = radeon_fence_driver_start_ring(rdev,
7091                                                            R600_RING_TYPE_UVD_INDEX);
7092                         if (r)
7093                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7094                 }
7095         }
7096         if (r)
7097                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7098
7099         /* Enable IRQ */
7100         if (!rdev->irq.installed) {
7101                 r = radeon_irq_kms_init(rdev);
7102                 if (r)
7103                         return r;
7104         }
7105
7106         r = cik_irq_init(rdev);
7107         if (r) {
7108                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7109                 radeon_irq_kms_fini(rdev);
7110                 return r;
7111         }
7112         cik_irq_set(rdev);
7113
7114         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7115         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7116                              CP_RB0_RPTR, CP_RB0_WPTR,
7117                              RADEON_CP_PACKET2);
7118         if (r)
7119                 return r;
7120
7121         /* set up the compute queues */
7122         /* type-2 packets are deprecated on MEC, use type-3 instead */
7123         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7124         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7125                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7126                              PACKET3(PACKET3_NOP, 0x3FFF));
7127         if (r)
7128                 return r;
7129         ring->me = 1; /* first MEC */
7130         ring->pipe = 0; /* first pipe */
7131         ring->queue = 0; /* first queue */
7132         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7133
7134         /* type-2 packets are deprecated on MEC, use type-3 instead */
7135         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7136         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7137                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7138                              PACKET3(PACKET3_NOP, 0x3FFF));
7139         if (r)
7140                 return r;
7141         /* dGPU only have 1 MEC */
7142         ring->me = 1; /* first MEC */
7143         ring->pipe = 0; /* first pipe */
7144         ring->queue = 1; /* second queue */
7145         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7146
7147         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7148         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7149                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7150                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7151                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7152         if (r)
7153                 return r;
7154
7155         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7156         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7157                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7158                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7159                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7160         if (r)
7161                 return r;
7162
7163         r = cik_cp_resume(rdev);
7164         if (r)
7165                 return r;
7166
7167         r = cik_sdma_resume(rdev);
7168         if (r)
7169                 return r;
7170
7171         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7172         if (ring->ring_size) {
7173                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7174                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7175                                      RADEON_CP_PACKET2);
7176                 if (!r)
7177                         r = uvd_v1_0_init(rdev);
7178                 if (r)
7179                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7180         }
7181
7182         r = radeon_ib_pool_init(rdev);
7183         if (r) {
7184                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7185                 return r;
7186         }
7187
7188         r = radeon_vm_manager_init(rdev);
7189         if (r) {
7190                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7191                 return r;
7192         }
7193
7194         r = dce6_audio_init(rdev);
7195         if (r)
7196                 return r;
7197
7198         return 0;
7199 }
7200
7201 /**
7202  * cik_resume - resume the asic to a functional state
7203  *
7204  * @rdev: radeon_device pointer
7205  *
7206  * Programs the asic to a functional state (CIK).
7207  * Called at resume.
7208  * Returns 0 for success, error for failure.
7209  */
7210 int cik_resume(struct radeon_device *rdev)
7211 {
7212         int r;
7213
7214         /* post card */
7215         atom_asic_init(rdev->mode_info.atom_context);
7216
7217         /* init golden registers */
7218         cik_init_golden_registers(rdev);
7219
7220         rdev->accel_working = true;
7221         r = cik_startup(rdev);
7222         if (r) {
7223                 DRM_ERROR("cik startup failed on resume\n");
7224                 rdev->accel_working = false;
7225                 return r;
7226         }
7227
7228         return r;
7229
7230 }
7231
7232 /**
7233  * cik_suspend - suspend the asic
7234  *
7235  * @rdev: radeon_device pointer
7236  *
7237  * Bring the chip into a state suitable for suspend (CIK).
7238  * Called at suspend.
7239  * Returns 0 for success.
7240  */
7241 int cik_suspend(struct radeon_device *rdev)
7242 {
7243         dce6_audio_fini(rdev);
7244         radeon_vm_manager_fini(rdev);
7245         cik_cp_enable(rdev, false);
7246         cik_sdma_enable(rdev, false);
7247         uvd_v1_0_fini(rdev);
7248         radeon_uvd_suspend(rdev);
7249         cik_fini_pg(rdev);
7250         cik_fini_cg(rdev);
7251         cik_irq_suspend(rdev);
7252         radeon_wb_disable(rdev);
7253         cik_pcie_gart_disable(rdev);
7254         return 0;
7255 }
7256
7257 /* Plan is to move initialization in that function and use
7258  * helper function so that radeon_device_init pretty much
7259  * do nothing more than calling asic specific function. This
7260  * should also allow to remove a bunch of callback function
7261  * like vram_info.
7262  */
7263 /**
7264  * cik_init - asic specific driver and hw init
7265  *
7266  * @rdev: radeon_device pointer
7267  *
7268  * Setup asic specific driver variables and program the hw
7269  * to a functional state (CIK).
7270  * Called at driver startup.
7271  * Returns 0 for success, errors for failure.
7272  */
7273 int cik_init(struct radeon_device *rdev)
7274 {
7275         struct radeon_ring *ring;
7276         int r;
7277
7278         /* Read BIOS */
7279         if (!radeon_get_bios(rdev)) {
7280                 if (ASIC_IS_AVIVO(rdev))
7281                         return -EINVAL;
7282         }
7283         /* Must be an ATOMBIOS */
7284         if (!rdev->is_atom_bios) {
7285                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7286                 return -EINVAL;
7287         }
7288         r = radeon_atombios_init(rdev);
7289         if (r)
7290                 return r;
7291
7292         /* Post card if necessary */
7293         if (!radeon_card_posted(rdev)) {
7294                 if (!rdev->bios) {
7295                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7296                         return -EINVAL;
7297                 }
7298                 DRM_INFO("GPU not posted. posting now...\n");
7299                 atom_asic_init(rdev->mode_info.atom_context);
7300         }
7301         /* init golden registers */
7302         cik_init_golden_registers(rdev);
7303         /* Initialize scratch registers */
7304         cik_scratch_init(rdev);
7305         /* Initialize surface registers */
7306         radeon_surface_init(rdev);
7307         /* Initialize clocks */
7308         radeon_get_clock_info(rdev->ddev);
7309
7310         /* Fence driver */
7311         r = radeon_fence_driver_init(rdev);
7312         if (r)
7313                 return r;
7314
7315         /* initialize memory controller */
7316         r = cik_mc_init(rdev);
7317         if (r)
7318                 return r;
7319         /* Memory manager */
7320         r = radeon_bo_init(rdev);
7321         if (r)
7322                 return r;
7323
7324         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7325         ring->ring_obj = NULL;
7326         r600_ring_init(rdev, ring, 1024 * 1024);
7327
7328         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7329         ring->ring_obj = NULL;
7330         r600_ring_init(rdev, ring, 1024 * 1024);
7331         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7332         if (r)
7333                 return r;
7334
7335         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7336         ring->ring_obj = NULL;
7337         r600_ring_init(rdev, ring, 1024 * 1024);
7338         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7339         if (r)
7340                 return r;
7341
7342         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7343         ring->ring_obj = NULL;
7344         r600_ring_init(rdev, ring, 256 * 1024);
7345
7346         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7347         ring->ring_obj = NULL;
7348         r600_ring_init(rdev, ring, 256 * 1024);
7349
7350         r = radeon_uvd_init(rdev);
7351         if (!r) {
7352                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7353                 ring->ring_obj = NULL;
7354                 r600_ring_init(rdev, ring, 4096);
7355         }
7356
7357         rdev->ih.ring_obj = NULL;
7358         r600_ih_ring_init(rdev, 64 * 1024);
7359
7360         r = r600_pcie_gart_init(rdev);
7361         if (r)
7362                 return r;
7363
7364         rdev->accel_working = true;
7365         r = cik_startup(rdev);
7366         if (r) {
7367                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7368                 cik_cp_fini(rdev);
7369                 cik_sdma_fini(rdev);
7370                 cik_irq_fini(rdev);
7371                 sumo_rlc_fini(rdev);
7372                 cik_mec_fini(rdev);
7373                 radeon_wb_fini(rdev);
7374                 radeon_ib_pool_fini(rdev);
7375                 radeon_vm_manager_fini(rdev);
7376                 radeon_irq_kms_fini(rdev);
7377                 cik_pcie_gart_fini(rdev);
7378                 rdev->accel_working = false;
7379         }
7380
7381         /* Don't start up if the MC ucode is missing.
7382          * The default clocks and voltages before the MC ucode
7383          * is loaded are not suffient for advanced operations.
7384          */
7385         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7386                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7387                 return -EINVAL;
7388         }
7389
7390         return 0;
7391 }
7392
7393 /**
7394  * cik_fini - asic specific driver and hw fini
7395  *
7396  * @rdev: radeon_device pointer
7397  *
7398  * Tear down the asic specific driver variables and program the hw
7399  * to an idle state (CIK).
7400  * Called at driver unload.
7401  */
7402 void cik_fini(struct radeon_device *rdev)
7403 {
7404         cik_cp_fini(rdev);
7405         cik_sdma_fini(rdev);
7406         cik_fini_pg(rdev);
7407         cik_fini_cg(rdev);
7408         cik_irq_fini(rdev);
7409         sumo_rlc_fini(rdev);
7410         cik_mec_fini(rdev);
7411         radeon_wb_fini(rdev);
7412         radeon_vm_manager_fini(rdev);
7413         radeon_ib_pool_fini(rdev);
7414         radeon_irq_kms_fini(rdev);
7415         uvd_v1_0_fini(rdev);
7416         radeon_uvd_fini(rdev);
7417         cik_pcie_gart_fini(rdev);
7418         r600_vram_scratch_fini(rdev);
7419         radeon_gem_fini(rdev);
7420         radeon_fence_driver_fini(rdev);
7421         radeon_bo_fini(rdev);
7422         radeon_atombios_fini(rdev);
7423         kfree(rdev->bios);
7424         rdev->bios = NULL;
7425 }
7426
7427 /* display watermark setup */
7428 /**
7429  * dce8_line_buffer_adjust - Set up the line buffer
7430  *
7431  * @rdev: radeon_device pointer
7432  * @radeon_crtc: the selected display controller
7433  * @mode: the current display mode on the selected display
7434  * controller
7435  *
7436  * Setup up the line buffer allocation for
7437  * the selected display controller (CIK).
7438  * Returns the line buffer size in pixels.
7439  */
7440 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7441                                    struct radeon_crtc *radeon_crtc,
7442                                    struct drm_display_mode *mode)
7443 {
7444         u32 tmp, buffer_alloc, i;
7445         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7446         /*
7447          * Line Buffer Setup
7448          * There are 6 line buffers, one for each display controllers.
7449          * There are 3 partitions per LB. Select the number of partitions
7450          * to enable based on the display width.  For display widths larger
7451          * than 4096, you need use to use 2 display controllers and combine
7452          * them using the stereo blender.
7453          */
7454         if (radeon_crtc->base.enabled && mode) {
7455                 if (mode->crtc_hdisplay < 1920) {
7456                         tmp = 1;
7457                         buffer_alloc = 2;
7458                 } else if (mode->crtc_hdisplay < 2560) {
7459                         tmp = 2;
7460                         buffer_alloc = 2;
7461                 } else if (mode->crtc_hdisplay < 4096) {
7462                         tmp = 0;
7463                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7464                 } else {
7465                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7466                         tmp = 0;
7467                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468                 }
7469         } else {
7470                 tmp = 1;
7471                 buffer_alloc = 0;
7472         }
7473
7474         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7475                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7476
7477         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7478                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7479         for (i = 0; i < rdev->usec_timeout; i++) {
7480                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7481                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7482                         break;
7483                 udelay(1);
7484         }
7485
7486         if (radeon_crtc->base.enabled && mode) {
7487                 switch (tmp) {
7488                 case 0:
7489                 default:
7490                         return 4096 * 2;
7491                 case 1:
7492                         return 1920 * 2;
7493                 case 2:
7494                         return 2560 * 2;
7495                 }
7496         }
7497
7498         /* controller not enabled, so no lb used */
7499         return 0;
7500 }
7501
7502 /**
7503  * cik_get_number_of_dram_channels - get the number of dram channels
7504  *
7505  * @rdev: radeon_device pointer
7506  *
7507  * Look up the number of video ram channels (CIK).
7508  * Used for display watermark bandwidth calculations
7509  * Returns the number of dram channels
7510  */
7511 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7512 {
7513         u32 tmp = RREG32(MC_SHARED_CHMAP);
7514
7515         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7516         case 0:
7517         default:
7518                 return 1;
7519         case 1:
7520                 return 2;
7521         case 2:
7522                 return 4;
7523         case 3:
7524                 return 8;
7525         case 4:
7526                 return 3;
7527         case 5:
7528                 return 6;
7529         case 6:
7530                 return 10;
7531         case 7:
7532                 return 12;
7533         case 8:
7534                 return 16;
7535         }
7536 }
7537
7538 struct dce8_wm_params {
7539         u32 dram_channels; /* number of dram channels */
7540         u32 yclk;          /* bandwidth per dram data pin in kHz */
7541         u32 sclk;          /* engine clock in kHz */
7542         u32 disp_clk;      /* display clock in kHz */
7543         u32 src_width;     /* viewport width */
7544         u32 active_time;   /* active display time in ns */
7545         u32 blank_time;    /* blank time in ns */
7546         bool interlaced;    /* mode is interlaced */
7547         fixed20_12 vsc;    /* vertical scale ratio */
7548         u32 num_heads;     /* number of active crtcs */
7549         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7550         u32 lb_size;       /* line buffer allocated to pipe */
7551         u32 vtaps;         /* vertical scaler taps */
7552 };
7553
7554 /**
7555  * dce8_dram_bandwidth - get the dram bandwidth
7556  *
7557  * @wm: watermark calculation data
7558  *
7559  * Calculate the raw dram bandwidth (CIK).
7560  * Used for display watermark bandwidth calculations
7561  * Returns the dram bandwidth in MBytes/s
7562  */
7563 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7564 {
7565         /* Calculate raw DRAM Bandwidth */
7566         fixed20_12 dram_efficiency; /* 0.7 */
7567         fixed20_12 yclk, dram_channels, bandwidth;
7568         fixed20_12 a;
7569
7570         a.full = dfixed_const(1000);
7571         yclk.full = dfixed_const(wm->yclk);
7572         yclk.full = dfixed_div(yclk, a);
7573         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7574         a.full = dfixed_const(10);
7575         dram_efficiency.full = dfixed_const(7);
7576         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7577         bandwidth.full = dfixed_mul(dram_channels, yclk);
7578         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7579
7580         return dfixed_trunc(bandwidth);
7581 }
7582
7583 /**
7584  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7585  *
7586  * @wm: watermark calculation data
7587  *
7588  * Calculate the dram bandwidth used for display (CIK).
7589  * Used for display watermark bandwidth calculations
7590  * Returns the dram bandwidth for display in MBytes/s
7591  */
7592 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7593 {
7594         /* Calculate DRAM Bandwidth and the part allocated to display. */
7595         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7596         fixed20_12 yclk, dram_channels, bandwidth;
7597         fixed20_12 a;
7598
7599         a.full = dfixed_const(1000);
7600         yclk.full = dfixed_const(wm->yclk);
7601         yclk.full = dfixed_div(yclk, a);
7602         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7603         a.full = dfixed_const(10);
7604         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7605         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7606         bandwidth.full = dfixed_mul(dram_channels, yclk);
7607         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7608
7609         return dfixed_trunc(bandwidth);
7610 }
7611
7612 /**
7613  * dce8_data_return_bandwidth - get the data return bandwidth
7614  *
7615  * @wm: watermark calculation data
7616  *
7617  * Calculate the data return bandwidth used for display (CIK).
7618  * Used for display watermark bandwidth calculations
7619  * Returns the data return bandwidth in MBytes/s
7620  */
7621 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7622 {
7623         /* Calculate the display Data return Bandwidth */
7624         fixed20_12 return_efficiency; /* 0.8 */
7625         fixed20_12 sclk, bandwidth;
7626         fixed20_12 a;
7627
7628         a.full = dfixed_const(1000);
7629         sclk.full = dfixed_const(wm->sclk);
7630         sclk.full = dfixed_div(sclk, a);
7631         a.full = dfixed_const(10);
7632         return_efficiency.full = dfixed_const(8);
7633         return_efficiency.full = dfixed_div(return_efficiency, a);
7634         a.full = dfixed_const(32);
7635         bandwidth.full = dfixed_mul(a, sclk);
7636         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7637
7638         return dfixed_trunc(bandwidth);
7639 }
7640
7641 /**
7642  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7643  *
7644  * @wm: watermark calculation data
7645  *
7646  * Calculate the dmif bandwidth used for display (CIK).
7647  * Used for display watermark bandwidth calculations
7648  * Returns the dmif bandwidth in MBytes/s
7649  */
7650 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7651 {
7652         /* Calculate the DMIF Request Bandwidth */
7653         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7654         fixed20_12 disp_clk, bandwidth;
7655         fixed20_12 a, b;
7656
7657         a.full = dfixed_const(1000);
7658         disp_clk.full = dfixed_const(wm->disp_clk);
7659         disp_clk.full = dfixed_div(disp_clk, a);
7660         a.full = dfixed_const(32);
7661         b.full = dfixed_mul(a, disp_clk);
7662
7663         a.full = dfixed_const(10);
7664         disp_clk_request_efficiency.full = dfixed_const(8);
7665         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7666
7667         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7668
7669         return dfixed_trunc(bandwidth);
7670 }
7671
7672 /**
7673  * dce8_available_bandwidth - get the min available bandwidth
7674  *
7675  * @wm: watermark calculation data
7676  *
7677  * Calculate the min available bandwidth used for display (CIK).
7678  * Used for display watermark bandwidth calculations
7679  * Returns the min available bandwidth in MBytes/s
7680  */
7681 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7682 {
7683         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7684         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7685         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7686         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7687
7688         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7689 }
7690
7691 /**
7692  * dce8_average_bandwidth - get the average available bandwidth
7693  *
7694  * @wm: watermark calculation data
7695  *
7696  * Calculate the average available bandwidth used for display (CIK).
7697  * Used for display watermark bandwidth calculations
7698  * Returns the average available bandwidth in MBytes/s
7699  */
7700 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7701 {
7702         /* Calculate the display mode Average Bandwidth
7703          * DisplayMode should contain the source and destination dimensions,
7704          * timing, etc.
7705          */
7706         fixed20_12 bpp;
7707         fixed20_12 line_time;
7708         fixed20_12 src_width;
7709         fixed20_12 bandwidth;
7710         fixed20_12 a;
7711
7712         a.full = dfixed_const(1000);
7713         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7714         line_time.full = dfixed_div(line_time, a);
7715         bpp.full = dfixed_const(wm->bytes_per_pixel);
7716         src_width.full = dfixed_const(wm->src_width);
7717         bandwidth.full = dfixed_mul(src_width, bpp);
7718         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7719         bandwidth.full = dfixed_div(bandwidth, line_time);
7720
7721         return dfixed_trunc(bandwidth);
7722 }
7723
7724 /**
7725  * dce8_latency_watermark - get the latency watermark
7726  *
7727  * @wm: watermark calculation data
7728  *
7729  * Calculate the latency watermark (CIK).
7730  * Used for display watermark bandwidth calculations
7731  * Returns the latency watermark in ns
7732  */
7733 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7734 {
7735         /* First calculate the latency in ns */
7736         u32 mc_latency = 2000; /* 2000 ns. */
7737         u32 available_bandwidth = dce8_available_bandwidth(wm);
7738         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7739         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7740         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7741         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7742                 (wm->num_heads * cursor_line_pair_return_time);
7743         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7744         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7745         u32 tmp, dmif_size = 12288;
7746         fixed20_12 a, b, c;
7747
7748         if (wm->num_heads == 0)
7749                 return 0;
7750
7751         a.full = dfixed_const(2);
7752         b.full = dfixed_const(1);
7753         if ((wm->vsc.full > a.full) ||
7754             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7755             (wm->vtaps >= 5) ||
7756             ((wm->vsc.full >= a.full) && wm->interlaced))
7757                 max_src_lines_per_dst_line = 4;
7758         else
7759                 max_src_lines_per_dst_line = 2;
7760
7761         a.full = dfixed_const(available_bandwidth);
7762         b.full = dfixed_const(wm->num_heads);
7763         a.full = dfixed_div(a, b);
7764
7765         b.full = dfixed_const(mc_latency + 512);
7766         c.full = dfixed_const(wm->disp_clk);
7767         b.full = dfixed_div(b, c);
7768
7769         c.full = dfixed_const(dmif_size);
7770         b.full = dfixed_div(c, b);
7771
7772         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7773
7774         b.full = dfixed_const(1000);
7775         c.full = dfixed_const(wm->disp_clk);
7776         b.full = dfixed_div(c, b);
7777         c.full = dfixed_const(wm->bytes_per_pixel);
7778         b.full = dfixed_mul(b, c);
7779
7780         lb_fill_bw = min(tmp, dfixed_trunc(b));
7781
7782         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7783         b.full = dfixed_const(1000);
7784         c.full = dfixed_const(lb_fill_bw);
7785         b.full = dfixed_div(c, b);
7786         a.full = dfixed_div(a, b);
7787         line_fill_time = dfixed_trunc(a);
7788
7789         if (line_fill_time < wm->active_time)
7790                 return latency;
7791         else
7792                 return latency + (line_fill_time - wm->active_time);
7793
7794 }
7795
7796 /**
7797  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7798  * average and available dram bandwidth
7799  *
7800  * @wm: watermark calculation data
7801  *
7802  * Check if the display average bandwidth fits in the display
7803  * dram bandwidth (CIK).
7804  * Used for display watermark bandwidth calculations
7805  * Returns true if the display fits, false if not.
7806  */
7807 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7808 {
7809         if (dce8_average_bandwidth(wm) <=
7810             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7811                 return true;
7812         else
7813                 return false;
7814 }
7815
7816 /**
7817  * dce8_average_bandwidth_vs_available_bandwidth - check
7818  * average and available bandwidth
7819  *
7820  * @wm: watermark calculation data
7821  *
7822  * Check if the display average bandwidth fits in the display
7823  * available bandwidth (CIK).
7824  * Used for display watermark bandwidth calculations
7825  * Returns true if the display fits, false if not.
7826  */
7827 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7828 {
7829         if (dce8_average_bandwidth(wm) <=
7830             (dce8_available_bandwidth(wm) / wm->num_heads))
7831                 return true;
7832         else
7833                 return false;
7834 }
7835
7836 /**
7837  * dce8_check_latency_hiding - check latency hiding
7838  *
7839  * @wm: watermark calculation data
7840  *
7841  * Check latency hiding (CIK).
7842  * Used for display watermark bandwidth calculations
7843  * Returns true if the display fits, false if not.
7844  */
7845 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7846 {
7847         u32 lb_partitions = wm->lb_size / wm->src_width;
7848         u32 line_time = wm->active_time + wm->blank_time;
7849         u32 latency_tolerant_lines;
7850         u32 latency_hiding;
7851         fixed20_12 a;
7852
7853         a.full = dfixed_const(1);
7854         if (wm->vsc.full > a.full)
7855                 latency_tolerant_lines = 1;
7856         else {
7857                 if (lb_partitions <= (wm->vtaps + 1))
7858                         latency_tolerant_lines = 1;
7859                 else
7860                         latency_tolerant_lines = 2;
7861         }
7862
7863         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7864
7865         if (dce8_latency_watermark(wm) <= latency_hiding)
7866                 return true;
7867         else
7868                 return false;
7869 }
7870
7871 /**
7872  * dce8_program_watermarks - program display watermarks
7873  *
7874  * @rdev: radeon_device pointer
7875  * @radeon_crtc: the selected display controller
7876  * @lb_size: line buffer size
7877  * @num_heads: number of display controllers in use
7878  *
7879  * Calculate and program the display watermarks for the
7880  * selected display controller (CIK).
7881  */
7882 static void dce8_program_watermarks(struct radeon_device *rdev,
7883                                     struct radeon_crtc *radeon_crtc,
7884                                     u32 lb_size, u32 num_heads)
7885 {
7886         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7887         struct dce8_wm_params wm_low, wm_high;
7888         u32 pixel_period;
7889         u32 line_time = 0;
7890         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7891         u32 tmp, wm_mask;
7892
7893         if (radeon_crtc->base.enabled && num_heads && mode) {
7894                 pixel_period = 1000000 / (u32)mode->clock;
7895                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7896
7897                 /* watermark for high clocks */
7898                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7899                     rdev->pm.dpm_enabled) {
7900                         wm_high.yclk =
7901                                 radeon_dpm_get_mclk(rdev, false) * 10;
7902                         wm_high.sclk =
7903                                 radeon_dpm_get_sclk(rdev, false) * 10;
7904                 } else {
7905                         wm_high.yclk = rdev->pm.current_mclk * 10;
7906                         wm_high.sclk = rdev->pm.current_sclk * 10;
7907                 }
7908
7909                 wm_high.disp_clk = mode->clock;
7910                 wm_high.src_width = mode->crtc_hdisplay;
7911                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7912                 wm_high.blank_time = line_time - wm_high.active_time;
7913                 wm_high.interlaced = false;
7914                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7915                         wm_high.interlaced = true;
7916                 wm_high.vsc = radeon_crtc->vsc;
7917                 wm_high.vtaps = 1;
7918                 if (radeon_crtc->rmx_type != RMX_OFF)
7919                         wm_high.vtaps = 2;
7920                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7921                 wm_high.lb_size = lb_size;
7922                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7923                 wm_high.num_heads = num_heads;
7924
7925                 /* set for high clocks */
7926                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7927
7928                 /* possibly force display priority to high */
7929                 /* should really do this at mode validation time... */
7930                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7931                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7932                     !dce8_check_latency_hiding(&wm_high) ||
7933                     (rdev->disp_priority == 2)) {
7934                         DRM_DEBUG_KMS("force priority to high\n");
7935                 }
7936
7937                 /* watermark for low clocks */
7938                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7939                     rdev->pm.dpm_enabled) {
7940                         wm_low.yclk =
7941                                 radeon_dpm_get_mclk(rdev, true) * 10;
7942                         wm_low.sclk =
7943                                 radeon_dpm_get_sclk(rdev, true) * 10;
7944                 } else {
7945                         wm_low.yclk = rdev->pm.current_mclk * 10;
7946                         wm_low.sclk = rdev->pm.current_sclk * 10;
7947                 }
7948
7949                 wm_low.disp_clk = mode->clock;
7950                 wm_low.src_width = mode->crtc_hdisplay;
7951                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7952                 wm_low.blank_time = line_time - wm_low.active_time;
7953                 wm_low.interlaced = false;
7954                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7955                         wm_low.interlaced = true;
7956                 wm_low.vsc = radeon_crtc->vsc;
7957                 wm_low.vtaps = 1;
7958                 if (radeon_crtc->rmx_type != RMX_OFF)
7959                         wm_low.vtaps = 2;
7960                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7961                 wm_low.lb_size = lb_size;
7962                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7963                 wm_low.num_heads = num_heads;
7964
7965                 /* set for low clocks */
7966                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7967
7968                 /* possibly force display priority to high */
7969                 /* should really do this at mode validation time... */
7970                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7971                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7972                     !dce8_check_latency_hiding(&wm_low) ||
7973                     (rdev->disp_priority == 2)) {
7974                         DRM_DEBUG_KMS("force priority to high\n");
7975                 }
7976         }
7977
7978         /* select wm A */
7979         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7980         tmp = wm_mask;
7981         tmp &= ~LATENCY_WATERMARK_MASK(3);
7982         tmp |= LATENCY_WATERMARK_MASK(1);
7983         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7984         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7985                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7986                 LATENCY_HIGH_WATERMARK(line_time)));
7987         /* select wm B */
7988         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7989         tmp &= ~LATENCY_WATERMARK_MASK(3);
7990         tmp |= LATENCY_WATERMARK_MASK(2);
7991         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7992         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7993                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7994                 LATENCY_HIGH_WATERMARK(line_time)));
7995         /* restore original selection */
7996         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7997
7998         /* save values for DPM */
7999         radeon_crtc->line_time = line_time;
8000         radeon_crtc->wm_high = latency_watermark_a;
8001         radeon_crtc->wm_low = latency_watermark_b;
8002 }
8003
8004 /**
8005  * dce8_bandwidth_update - program display watermarks
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Calculate and program the display watermarks and line
8010  * buffer allocation (CIK).
8011  */
8012 void dce8_bandwidth_update(struct radeon_device *rdev)
8013 {
8014         struct drm_display_mode *mode = NULL;
8015         u32 num_heads = 0, lb_size;
8016         int i;
8017
8018         radeon_update_display_priority(rdev);
8019
8020         for (i = 0; i < rdev->num_crtc; i++) {
8021                 if (rdev->mode_info.crtcs[i]->base.enabled)
8022                         num_heads++;
8023         }
8024         for (i = 0; i < rdev->num_crtc; i++) {
8025                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8026                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8027                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8028         }
8029 }
8030
8031 /**
8032  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8033  *
8034  * @rdev: radeon_device pointer
8035  *
8036  * Fetches a GPU clock counter snapshot (SI).
8037  * Returns the 64 bit clock counter snapshot.
8038  */
8039 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8040 {
8041         uint64_t clock;
8042
8043         mutex_lock(&rdev->gpu_clock_mutex);
8044         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8045         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8046                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8047         mutex_unlock(&rdev->gpu_clock_mutex);
8048         return clock;
8049 }
8050
8051 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8052                               u32 cntl_reg, u32 status_reg)
8053 {
8054         int r, i;
8055         struct atom_clock_dividers dividers;
8056         uint32_t tmp;
8057
8058         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8059                                            clock, false, &dividers);
8060         if (r)
8061                 return r;
8062
8063         tmp = RREG32_SMC(cntl_reg);
8064         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8065         tmp |= dividers.post_divider;
8066         WREG32_SMC(cntl_reg, tmp);
8067
8068         for (i = 0; i < 100; i++) {
8069                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8070                         break;
8071                 mdelay(10);
8072         }
8073         if (i == 100)
8074                 return -ETIMEDOUT;
8075
8076         return 0;
8077 }
8078
8079 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8080 {
8081         int r = 0;
8082
8083         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8084         if (r)
8085                 return r;
8086
8087         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8088         return r;
8089 }
8090
8091 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8092 {
8093         struct pci_dev *root = rdev->pdev->bus->self;
8094         int bridge_pos, gpu_pos;
8095         u32 speed_cntl, mask, current_data_rate;
8096         int ret, i;
8097         u16 tmp16;
8098
8099         if (radeon_pcie_gen2 == 0)
8100                 return;
8101
8102         if (rdev->flags & RADEON_IS_IGP)
8103                 return;
8104
8105         if (!(rdev->flags & RADEON_IS_PCIE))
8106                 return;
8107
8108         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8109         if (ret != 0)
8110                 return;
8111
8112         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8113                 return;
8114
8115         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8116         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8117                 LC_CURRENT_DATA_RATE_SHIFT;
8118         if (mask & DRM_PCIE_SPEED_80) {
8119                 if (current_data_rate == 2) {
8120                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8121                         return;
8122                 }
8123                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8124         } else if (mask & DRM_PCIE_SPEED_50) {
8125                 if (current_data_rate == 1) {
8126                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8127                         return;
8128                 }
8129                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8130         }
8131
8132         bridge_pos = pci_pcie_cap(root);
8133         if (!bridge_pos)
8134                 return;
8135
8136         gpu_pos = pci_pcie_cap(rdev->pdev);
8137         if (!gpu_pos)
8138                 return;
8139
8140         if (mask & DRM_PCIE_SPEED_80) {
8141                 /* re-try equalization if gen3 is not already enabled */
8142                 if (current_data_rate != 2) {
8143                         u16 bridge_cfg, gpu_cfg;
8144                         u16 bridge_cfg2, gpu_cfg2;
8145                         u32 max_lw, current_lw, tmp;
8146
8147                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8148                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8149
8150                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8151                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8152
8153                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8154                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8155
8156                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8157                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8158                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8159
8160                         if (current_lw < max_lw) {
8161                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8162                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8163                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8164                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8165                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8166                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8167                                 }
8168                         }
8169
8170                         for (i = 0; i < 10; i++) {
8171                                 /* check status */
8172                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8173                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8174                                         break;
8175
8176                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8177                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8178
8179                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8180                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8181
8182                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8183                                 tmp |= LC_SET_QUIESCE;
8184                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8185
8186                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187                                 tmp |= LC_REDO_EQ;
8188                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8189
8190                                 mdelay(100);
8191
8192                                 /* linkctl */
8193                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8194                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8195                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8196                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8197
8198                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8199                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8200                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8201                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8202
8203                                 /* linkctl2 */
8204                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8205                                 tmp16 &= ~((1 << 4) | (7 << 9));
8206                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8207                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8208
8209                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8210                                 tmp16 &= ~((1 << 4) | (7 << 9));
8211                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8212                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8213
8214                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8215                                 tmp &= ~LC_SET_QUIESCE;
8216                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8217                         }
8218                 }
8219         }
8220
8221         /* set the link speed */
8222         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8223         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8224         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8225
8226         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8227         tmp16 &= ~0xf;
8228         if (mask & DRM_PCIE_SPEED_80)
8229                 tmp16 |= 3; /* gen3 */
8230         else if (mask & DRM_PCIE_SPEED_50)
8231                 tmp16 |= 2; /* gen2 */
8232         else
8233                 tmp16 |= 1; /* gen1 */
8234         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8235
8236         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8237         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8238         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8239
8240         for (i = 0; i < rdev->usec_timeout; i++) {
8241                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8242                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8243                         break;
8244                 udelay(1);
8245         }
8246 }
8247
8248 static void cik_program_aspm(struct radeon_device *rdev)
8249 {
8250         u32 data, orig;
8251         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8252         bool disable_clkreq = false;
8253
8254         if (radeon_aspm == 0)
8255                 return;
8256
8257         /* XXX double check IGPs */
8258         if (rdev->flags & RADEON_IS_IGP)
8259                 return;
8260
8261         if (!(rdev->flags & RADEON_IS_PCIE))
8262                 return;
8263
8264         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8265         data &= ~LC_XMIT_N_FTS_MASK;
8266         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8267         if (orig != data)
8268                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8269
8270         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8271         data |= LC_GO_TO_RECOVERY;
8272         if (orig != data)
8273                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8274
8275         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8276         data |= P_IGNORE_EDB_ERR;
8277         if (orig != data)
8278                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8279
8280         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8281         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8282         data |= LC_PMI_TO_L1_DIS;
8283         if (!disable_l0s)
8284                 data |= LC_L0S_INACTIVITY(7);
8285
8286         if (!disable_l1) {
8287                 data |= LC_L1_INACTIVITY(7);
8288                 data &= ~LC_PMI_TO_L1_DIS;
8289                 if (orig != data)
8290                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8291
8292                 if (!disable_plloff_in_l1) {
8293                         bool clk_req_support;
8294
8295                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8296                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8297                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8298                         if (orig != data)
8299                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8300
8301                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8302                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8303                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8304                         if (orig != data)
8305                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8306
8307                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8308                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8309                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8310                         if (orig != data)
8311                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8312
8313                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8314                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8315                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8316                         if (orig != data)
8317                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8318
8319                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8320                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8321                         data |= LC_DYN_LANES_PWR_STATE(3);
8322                         if (orig != data)
8323                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8324
8325                         if (!disable_clkreq) {
8326                                 struct pci_dev *root = rdev->pdev->bus->self;
8327                                 u32 lnkcap;
8328
8329                                 clk_req_support = false;
8330                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8331                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8332                                         clk_req_support = true;
8333                         } else {
8334                                 clk_req_support = false;
8335                         }
8336
8337                         if (clk_req_support) {
8338                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8339                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8340                                 if (orig != data)
8341                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8342
8343                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8344                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8345                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8346                                 if (orig != data)
8347                                         WREG32_SMC(THM_CLK_CNTL, data);
8348
8349                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8350                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8351                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8352                                 if (orig != data)
8353                                         WREG32_SMC(MISC_CLK_CTRL, data);
8354
8355                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8356                                 data &= ~BCLK_AS_XCLK;
8357                                 if (orig != data)
8358                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8359
8360                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8361                                 data &= ~FORCE_BIF_REFCLK_EN;
8362                                 if (orig != data)
8363                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8364
8365                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8366                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8367                                 data |= MPLL_CLKOUT_SEL(4);
8368                                 if (orig != data)
8369                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8370                         }
8371                 }
8372         } else {
8373                 if (orig != data)
8374                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8375         }
8376
8377         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8378         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8379         if (orig != data)
8380                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8381
8382         if (!disable_l0s) {
8383                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8384                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8385                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8386                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8387                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8388                                 data &= ~LC_L0S_INACTIVITY_MASK;
8389                                 if (orig != data)
8390                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8391                         }
8392                 }
8393         }
8394 }