]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/si.c
Merge remote-tracking branch 'driver-core/driver-core-next'
[karo-tx-linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82                                struct radeon_ib *ib,
83                                uint64_t pe,
84                                uint64_t addr, unsigned count,
85                                uint32_t incr, uint32_t flags);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87                                          bool enable);
88 static void si_fini_pg(struct radeon_device *rdev);
89 static void si_fini_cg(struct radeon_device *rdev);
90 static void si_rlc_stop(struct radeon_device *rdev);
91
92 static const u32 verde_rlc_save_restore_register_list[] =
93 {
94         (0x8000 << 16) | (0x98f4 >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x98f4 >> 2),
97         0x00000000,
98         (0x8000 << 16) | (0xe80 >> 2),
99         0x00000000,
100         (0x8040 << 16) | (0xe80 >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x89bc >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x89bc >> 2),
105         0x00000000,
106         (0x8000 << 16) | (0x8c1c >> 2),
107         0x00000000,
108         (0x8040 << 16) | (0x8c1c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x98f0 >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xe7c >> 2),
113         0x00000000,
114         (0x8000 << 16) | (0x9148 >> 2),
115         0x00000000,
116         (0x8040 << 16) | (0x9148 >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0x9150 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x897c >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x8d8c >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0xac54 >> 2),
125         0X00000000,
126         0x3,
127         (0x9c00 << 16) | (0x98f8 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9910 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9914 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9918 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x991c >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9920 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9924 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9928 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x992c >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9930 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9934 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9938 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x993c >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9940 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9944 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9948 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x994c >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9950 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9954 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9958 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x995c >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9960 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9964 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9968 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x996c >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9970 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9974 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9978 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x997c >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x9980 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9984 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9988 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x998c >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x8c00 >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x8c14 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x8c04 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x8c08 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x9b7c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x9b7c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0xe84 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0xe84 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x89c0 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x89c0 >> 2),
212         0x00000000,
213         (0x8000 << 16) | (0x914c >> 2),
214         0x00000000,
215         (0x8040 << 16) | (0x914c >> 2),
216         0x00000000,
217         (0x8000 << 16) | (0x8c20 >> 2),
218         0x00000000,
219         (0x8040 << 16) | (0x8c20 >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x9354 >> 2),
222         0x00000000,
223         (0x8040 << 16) | (0x9354 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x9060 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9364 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9100 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x913c >> 2),
232         0x00000000,
233         (0x8000 << 16) | (0x90e0 >> 2),
234         0x00000000,
235         (0x8000 << 16) | (0x90e4 >> 2),
236         0x00000000,
237         (0x8000 << 16) | (0x90e8 >> 2),
238         0x00000000,
239         (0x8040 << 16) | (0x90e0 >> 2),
240         0x00000000,
241         (0x8040 << 16) | (0x90e4 >> 2),
242         0x00000000,
243         (0x8040 << 16) | (0x90e8 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8bcc >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8b24 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x88c4 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x8e50 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x8c0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x8e58 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x8e5c >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0x9508 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0x950c >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x9494 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0xac0c >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0xac10 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0xac14 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0xae00 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0xac08 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x88d4 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x88c8 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x88cc >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x89b0 >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x8b10 >> 2),
284         0x00000000,
285         (0x9c00 << 16) | (0x8a14 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x9830 >> 2),
288         0x00000000,
289         (0x9c00 << 16) | (0x9834 >> 2),
290         0x00000000,
291         (0x9c00 << 16) | (0x9838 >> 2),
292         0x00000000,
293         (0x9c00 << 16) | (0x9a10 >> 2),
294         0x00000000,
295         (0x8000 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8000 << 16) | (0x9874 >> 2),
298         0x00000000,
299         (0x8001 << 16) | (0x9870 >> 2),
300         0x00000000,
301         (0x8001 << 16) | (0x9874 >> 2),
302         0x00000000,
303         (0x8040 << 16) | (0x9870 >> 2),
304         0x00000000,
305         (0x8040 << 16) | (0x9874 >> 2),
306         0x00000000,
307         (0x8041 << 16) | (0x9870 >> 2),
308         0x00000000,
309         (0x8041 << 16) | (0x9874 >> 2),
310         0x00000000,
311         0x00000000
312 };
313
314 static const u32 tahiti_golden_rlc_registers[] =
315 {
316         0xc424, 0xffffffff, 0x00601005,
317         0xc47c, 0xffffffff, 0x10104040,
318         0xc488, 0xffffffff, 0x0100000a,
319         0xc314, 0xffffffff, 0x00000800,
320         0xc30c, 0xffffffff, 0x800000f4,
321         0xf4a8, 0xffffffff, 0x00000000
322 };
323
324 static const u32 tahiti_golden_registers[] =
325 {
326         0x9a10, 0x00010000, 0x00018208,
327         0x9830, 0xffffffff, 0x00000000,
328         0x9834, 0xf00fffff, 0x00000400,
329         0x9838, 0x0002021c, 0x00020200,
330         0xc78, 0x00000080, 0x00000000,
331         0xd030, 0x000300c0, 0x00800040,
332         0xd830, 0x000300c0, 0x00800040,
333         0x5bb0, 0x000000f0, 0x00000070,
334         0x5bc0, 0x00200000, 0x50100000,
335         0x7030, 0x31000311, 0x00000011,
336         0x277c, 0x00000003, 0x000007ff,
337         0x240c, 0x000007ff, 0x00000000,
338         0x8a14, 0xf000001f, 0x00000007,
339         0x8b24, 0xffffffff, 0x00ffffff,
340         0x8b10, 0x0000ff0f, 0x00000000,
341         0x28a4c, 0x07ffffff, 0x4e000000,
342         0x28350, 0x3f3f3fff, 0x2a00126a,
343         0x30, 0x000000ff, 0x0040,
344         0x34, 0x00000040, 0x00004040,
345         0x9100, 0x07ffffff, 0x03000000,
346         0x8e88, 0x01ff1f3f, 0x00000000,
347         0x8e84, 0x01ff1f3f, 0x00000000,
348         0x9060, 0x0000007f, 0x00000020,
349         0x9508, 0x00010000, 0x00010000,
350         0xac14, 0x00000200, 0x000002fb,
351         0xac10, 0xffffffff, 0x0000543b,
352         0xac0c, 0xffffffff, 0xa9210876,
353         0x88d0, 0xffffffff, 0x000fff40,
354         0x88d4, 0x0000001f, 0x00000010,
355         0x1410, 0x20000000, 0x20fffed8,
356         0x15c0, 0x000c0fc0, 0x000c0400
357 };
358
359 static const u32 tahiti_golden_registers2[] =
360 {
361         0xc64, 0x00000001, 0x00000001
362 };
363
364 static const u32 pitcairn_golden_rlc_registers[] =
365 {
366         0xc424, 0xffffffff, 0x00601004,
367         0xc47c, 0xffffffff, 0x10102020,
368         0xc488, 0xffffffff, 0x01000020,
369         0xc314, 0xffffffff, 0x00000800,
370         0xc30c, 0xffffffff, 0x800000a4
371 };
372
373 static const u32 pitcairn_golden_registers[] =
374 {
375         0x9a10, 0x00010000, 0x00018208,
376         0x9830, 0xffffffff, 0x00000000,
377         0x9834, 0xf00fffff, 0x00000400,
378         0x9838, 0x0002021c, 0x00020200,
379         0xc78, 0x00000080, 0x00000000,
380         0xd030, 0x000300c0, 0x00800040,
381         0xd830, 0x000300c0, 0x00800040,
382         0x5bb0, 0x000000f0, 0x00000070,
383         0x5bc0, 0x00200000, 0x50100000,
384         0x7030, 0x31000311, 0x00000011,
385         0x2ae4, 0x00073ffe, 0x000022a2,
386         0x240c, 0x000007ff, 0x00000000,
387         0x8a14, 0xf000001f, 0x00000007,
388         0x8b24, 0xffffffff, 0x00ffffff,
389         0x8b10, 0x0000ff0f, 0x00000000,
390         0x28a4c, 0x07ffffff, 0x4e000000,
391         0x28350, 0x3f3f3fff, 0x2a00126a,
392         0x30, 0x000000ff, 0x0040,
393         0x34, 0x00000040, 0x00004040,
394         0x9100, 0x07ffffff, 0x03000000,
395         0x9060, 0x0000007f, 0x00000020,
396         0x9508, 0x00010000, 0x00010000,
397         0xac14, 0x000003ff, 0x000000f7,
398         0xac10, 0xffffffff, 0x00000000,
399         0xac0c, 0xffffffff, 0x32761054,
400         0x88d4, 0x0000001f, 0x00000010,
401         0x15c0, 0x000c0fc0, 0x000c0400
402 };
403
404 static const u32 verde_golden_rlc_registers[] =
405 {
406         0xc424, 0xffffffff, 0x033f1005,
407         0xc47c, 0xffffffff, 0x10808020,
408         0xc488, 0xffffffff, 0x00800008,
409         0xc314, 0xffffffff, 0x00001000,
410         0xc30c, 0xffffffff, 0x80010014
411 };
412
413 static const u32 verde_golden_registers[] =
414 {
415         0x9a10, 0x00010000, 0x00018208,
416         0x9830, 0xffffffff, 0x00000000,
417         0x9834, 0xf00fffff, 0x00000400,
418         0x9838, 0x0002021c, 0x00020200,
419         0xc78, 0x00000080, 0x00000000,
420         0xd030, 0x000300c0, 0x00800040,
421         0xd030, 0x000300c0, 0x00800040,
422         0xd830, 0x000300c0, 0x00800040,
423         0xd830, 0x000300c0, 0x00800040,
424         0x5bb0, 0x000000f0, 0x00000070,
425         0x5bc0, 0x00200000, 0x50100000,
426         0x7030, 0x31000311, 0x00000011,
427         0x2ae4, 0x00073ffe, 0x000022a2,
428         0x2ae4, 0x00073ffe, 0x000022a2,
429         0x2ae4, 0x00073ffe, 0x000022a2,
430         0x240c, 0x000007ff, 0x00000000,
431         0x240c, 0x000007ff, 0x00000000,
432         0x240c, 0x000007ff, 0x00000000,
433         0x8a14, 0xf000001f, 0x00000007,
434         0x8a14, 0xf000001f, 0x00000007,
435         0x8a14, 0xf000001f, 0x00000007,
436         0x8b24, 0xffffffff, 0x00ffffff,
437         0x8b10, 0x0000ff0f, 0x00000000,
438         0x28a4c, 0x07ffffff, 0x4e000000,
439         0x28350, 0x3f3f3fff, 0x0000124a,
440         0x28350, 0x3f3f3fff, 0x0000124a,
441         0x28350, 0x3f3f3fff, 0x0000124a,
442         0x30, 0x000000ff, 0x0040,
443         0x34, 0x00000040, 0x00004040,
444         0x9100, 0x07ffffff, 0x03000000,
445         0x9100, 0x07ffffff, 0x03000000,
446         0x8e88, 0x01ff1f3f, 0x00000000,
447         0x8e88, 0x01ff1f3f, 0x00000000,
448         0x8e88, 0x01ff1f3f, 0x00000000,
449         0x8e84, 0x01ff1f3f, 0x00000000,
450         0x8e84, 0x01ff1f3f, 0x00000000,
451         0x8e84, 0x01ff1f3f, 0x00000000,
452         0x9060, 0x0000007f, 0x00000020,
453         0x9508, 0x00010000, 0x00010000,
454         0xac14, 0x000003ff, 0x00000003,
455         0xac14, 0x000003ff, 0x00000003,
456         0xac14, 0x000003ff, 0x00000003,
457         0xac10, 0xffffffff, 0x00000000,
458         0xac10, 0xffffffff, 0x00000000,
459         0xac10, 0xffffffff, 0x00000000,
460         0xac0c, 0xffffffff, 0x00001032,
461         0xac0c, 0xffffffff, 0x00001032,
462         0xac0c, 0xffffffff, 0x00001032,
463         0x88d4, 0x0000001f, 0x00000010,
464         0x88d4, 0x0000001f, 0x00000010,
465         0x88d4, 0x0000001f, 0x00000010,
466         0x15c0, 0x000c0fc0, 0x000c0400
467 };
468
469 static const u32 oland_golden_rlc_registers[] =
470 {
471         0xc424, 0xffffffff, 0x00601005,
472         0xc47c, 0xffffffff, 0x10104040,
473         0xc488, 0xffffffff, 0x0100000a,
474         0xc314, 0xffffffff, 0x00000800,
475         0xc30c, 0xffffffff, 0x800000f4
476 };
477
478 static const u32 oland_golden_registers[] =
479 {
480         0x9a10, 0x00010000, 0x00018208,
481         0x9830, 0xffffffff, 0x00000000,
482         0x9834, 0xf00fffff, 0x00000400,
483         0x9838, 0x0002021c, 0x00020200,
484         0xc78, 0x00000080, 0x00000000,
485         0xd030, 0x000300c0, 0x00800040,
486         0xd830, 0x000300c0, 0x00800040,
487         0x5bb0, 0x000000f0, 0x00000070,
488         0x5bc0, 0x00200000, 0x50100000,
489         0x7030, 0x31000311, 0x00000011,
490         0x2ae4, 0x00073ffe, 0x000022a2,
491         0x240c, 0x000007ff, 0x00000000,
492         0x8a14, 0xf000001f, 0x00000007,
493         0x8b24, 0xffffffff, 0x00ffffff,
494         0x8b10, 0x0000ff0f, 0x00000000,
495         0x28a4c, 0x07ffffff, 0x4e000000,
496         0x28350, 0x3f3f3fff, 0x00000082,
497         0x30, 0x000000ff, 0x0040,
498         0x34, 0x00000040, 0x00004040,
499         0x9100, 0x07ffffff, 0x03000000,
500         0x9060, 0x0000007f, 0x00000020,
501         0x9508, 0x00010000, 0x00010000,
502         0xac14, 0x000003ff, 0x000000f3,
503         0xac10, 0xffffffff, 0x00000000,
504         0xac0c, 0xffffffff, 0x00003210,
505         0x88d4, 0x0000001f, 0x00000010,
506         0x15c0, 0x000c0fc0, 0x000c0400
507 };
508
509 static const u32 hainan_golden_registers[] =
510 {
511         0x9a10, 0x00010000, 0x00018208,
512         0x9830, 0xffffffff, 0x00000000,
513         0x9834, 0xf00fffff, 0x00000400,
514         0x9838, 0x0002021c, 0x00020200,
515         0xd0c0, 0xff000fff, 0x00000100,
516         0xd030, 0x000300c0, 0x00800040,
517         0xd8c0, 0xff000fff, 0x00000100,
518         0xd830, 0x000300c0, 0x00800040,
519         0x2ae4, 0x00073ffe, 0x000022a2,
520         0x240c, 0x000007ff, 0x00000000,
521         0x8a14, 0xf000001f, 0x00000007,
522         0x8b24, 0xffffffff, 0x00ffffff,
523         0x8b10, 0x0000ff0f, 0x00000000,
524         0x28a4c, 0x07ffffff, 0x4e000000,
525         0x28350, 0x3f3f3fff, 0x00000000,
526         0x30, 0x000000ff, 0x0040,
527         0x34, 0x00000040, 0x00004040,
528         0x9100, 0x03e00000, 0x03600000,
529         0x9060, 0x0000007f, 0x00000020,
530         0x9508, 0x00010000, 0x00010000,
531         0xac14, 0x000003ff, 0x000000f1,
532         0xac10, 0xffffffff, 0x00000000,
533         0xac0c, 0xffffffff, 0x00003210,
534         0x88d4, 0x0000001f, 0x00000010,
535         0x15c0, 0x000c0fc0, 0x000c0400
536 };
537
538 static const u32 hainan_golden_registers2[] =
539 {
540         0x98f8, 0xffffffff, 0x02010001
541 };
542
543 static const u32 tahiti_mgcg_cgcg_init[] =
544 {
545         0xc400, 0xffffffff, 0xfffffffc,
546         0x802c, 0xffffffff, 0xe0000000,
547         0x9a60, 0xffffffff, 0x00000100,
548         0x92a4, 0xffffffff, 0x00000100,
549         0xc164, 0xffffffff, 0x00000100,
550         0x9774, 0xffffffff, 0x00000100,
551         0x8984, 0xffffffff, 0x06000100,
552         0x8a18, 0xffffffff, 0x00000100,
553         0x92a0, 0xffffffff, 0x00000100,
554         0xc380, 0xffffffff, 0x00000100,
555         0x8b28, 0xffffffff, 0x00000100,
556         0x9144, 0xffffffff, 0x00000100,
557         0x8d88, 0xffffffff, 0x00000100,
558         0x8d8c, 0xffffffff, 0x00000100,
559         0x9030, 0xffffffff, 0x00000100,
560         0x9034, 0xffffffff, 0x00000100,
561         0x9038, 0xffffffff, 0x00000100,
562         0x903c, 0xffffffff, 0x00000100,
563         0xad80, 0xffffffff, 0x00000100,
564         0xac54, 0xffffffff, 0x00000100,
565         0x897c, 0xffffffff, 0x06000100,
566         0x9868, 0xffffffff, 0x00000100,
567         0x9510, 0xffffffff, 0x00000100,
568         0xaf04, 0xffffffff, 0x00000100,
569         0xae04, 0xffffffff, 0x00000100,
570         0x949c, 0xffffffff, 0x00000100,
571         0x802c, 0xffffffff, 0xe0000000,
572         0x9160, 0xffffffff, 0x00010000,
573         0x9164, 0xffffffff, 0x00030002,
574         0x9168, 0xffffffff, 0x00040007,
575         0x916c, 0xffffffff, 0x00060005,
576         0x9170, 0xffffffff, 0x00090008,
577         0x9174, 0xffffffff, 0x00020001,
578         0x9178, 0xffffffff, 0x00040003,
579         0x917c, 0xffffffff, 0x00000007,
580         0x9180, 0xffffffff, 0x00060005,
581         0x9184, 0xffffffff, 0x00090008,
582         0x9188, 0xffffffff, 0x00030002,
583         0x918c, 0xffffffff, 0x00050004,
584         0x9190, 0xffffffff, 0x00000008,
585         0x9194, 0xffffffff, 0x00070006,
586         0x9198, 0xffffffff, 0x000a0009,
587         0x919c, 0xffffffff, 0x00040003,
588         0x91a0, 0xffffffff, 0x00060005,
589         0x91a4, 0xffffffff, 0x00000009,
590         0x91a8, 0xffffffff, 0x00080007,
591         0x91ac, 0xffffffff, 0x000b000a,
592         0x91b0, 0xffffffff, 0x00050004,
593         0x91b4, 0xffffffff, 0x00070006,
594         0x91b8, 0xffffffff, 0x0008000b,
595         0x91bc, 0xffffffff, 0x000a0009,
596         0x91c0, 0xffffffff, 0x000d000c,
597         0x91c4, 0xffffffff, 0x00060005,
598         0x91c8, 0xffffffff, 0x00080007,
599         0x91cc, 0xffffffff, 0x0000000b,
600         0x91d0, 0xffffffff, 0x000a0009,
601         0x91d4, 0xffffffff, 0x000d000c,
602         0x91d8, 0xffffffff, 0x00070006,
603         0x91dc, 0xffffffff, 0x00090008,
604         0x91e0, 0xffffffff, 0x0000000c,
605         0x91e4, 0xffffffff, 0x000b000a,
606         0x91e8, 0xffffffff, 0x000e000d,
607         0x91ec, 0xffffffff, 0x00080007,
608         0x91f0, 0xffffffff, 0x000a0009,
609         0x91f4, 0xffffffff, 0x0000000d,
610         0x91f8, 0xffffffff, 0x000c000b,
611         0x91fc, 0xffffffff, 0x000f000e,
612         0x9200, 0xffffffff, 0x00090008,
613         0x9204, 0xffffffff, 0x000b000a,
614         0x9208, 0xffffffff, 0x000c000f,
615         0x920c, 0xffffffff, 0x000e000d,
616         0x9210, 0xffffffff, 0x00110010,
617         0x9214, 0xffffffff, 0x000a0009,
618         0x9218, 0xffffffff, 0x000c000b,
619         0x921c, 0xffffffff, 0x0000000f,
620         0x9220, 0xffffffff, 0x000e000d,
621         0x9224, 0xffffffff, 0x00110010,
622         0x9228, 0xffffffff, 0x000b000a,
623         0x922c, 0xffffffff, 0x000d000c,
624         0x9230, 0xffffffff, 0x00000010,
625         0x9234, 0xffffffff, 0x000f000e,
626         0x9238, 0xffffffff, 0x00120011,
627         0x923c, 0xffffffff, 0x000c000b,
628         0x9240, 0xffffffff, 0x000e000d,
629         0x9244, 0xffffffff, 0x00000011,
630         0x9248, 0xffffffff, 0x0010000f,
631         0x924c, 0xffffffff, 0x00130012,
632         0x9250, 0xffffffff, 0x000d000c,
633         0x9254, 0xffffffff, 0x000f000e,
634         0x9258, 0xffffffff, 0x00100013,
635         0x925c, 0xffffffff, 0x00120011,
636         0x9260, 0xffffffff, 0x00150014,
637         0x9264, 0xffffffff, 0x000e000d,
638         0x9268, 0xffffffff, 0x0010000f,
639         0x926c, 0xffffffff, 0x00000013,
640         0x9270, 0xffffffff, 0x00120011,
641         0x9274, 0xffffffff, 0x00150014,
642         0x9278, 0xffffffff, 0x000f000e,
643         0x927c, 0xffffffff, 0x00110010,
644         0x9280, 0xffffffff, 0x00000014,
645         0x9284, 0xffffffff, 0x00130012,
646         0x9288, 0xffffffff, 0x00160015,
647         0x928c, 0xffffffff, 0x0010000f,
648         0x9290, 0xffffffff, 0x00120011,
649         0x9294, 0xffffffff, 0x00000015,
650         0x9298, 0xffffffff, 0x00140013,
651         0x929c, 0xffffffff, 0x00170016,
652         0x9150, 0xffffffff, 0x96940200,
653         0x8708, 0xffffffff, 0x00900100,
654         0xc478, 0xffffffff, 0x00000080,
655         0xc404, 0xffffffff, 0x0020003f,
656         0x30, 0xffffffff, 0x0000001c,
657         0x34, 0x000f0000, 0x000f0000,
658         0x160c, 0xffffffff, 0x00000100,
659         0x1024, 0xffffffff, 0x00000100,
660         0x102c, 0x00000101, 0x00000000,
661         0x20a8, 0xffffffff, 0x00000104,
662         0x264c, 0x000c0000, 0x000c0000,
663         0x2648, 0x000c0000, 0x000c0000,
664         0x55e4, 0xff000fff, 0x00000100,
665         0x55e8, 0x00000001, 0x00000001,
666         0x2f50, 0x00000001, 0x00000001,
667         0x30cc, 0xc0000fff, 0x00000104,
668         0xc1e4, 0x00000001, 0x00000001,
669         0xd0c0, 0xfffffff0, 0x00000100,
670         0xd8c0, 0xfffffff0, 0x00000100
671 };
672
673 static const u32 pitcairn_mgcg_cgcg_init[] =
674 {
675         0xc400, 0xffffffff, 0xfffffffc,
676         0x802c, 0xffffffff, 0xe0000000,
677         0x9a60, 0xffffffff, 0x00000100,
678         0x92a4, 0xffffffff, 0x00000100,
679         0xc164, 0xffffffff, 0x00000100,
680         0x9774, 0xffffffff, 0x00000100,
681         0x8984, 0xffffffff, 0x06000100,
682         0x8a18, 0xffffffff, 0x00000100,
683         0x92a0, 0xffffffff, 0x00000100,
684         0xc380, 0xffffffff, 0x00000100,
685         0x8b28, 0xffffffff, 0x00000100,
686         0x9144, 0xffffffff, 0x00000100,
687         0x8d88, 0xffffffff, 0x00000100,
688         0x8d8c, 0xffffffff, 0x00000100,
689         0x9030, 0xffffffff, 0x00000100,
690         0x9034, 0xffffffff, 0x00000100,
691         0x9038, 0xffffffff, 0x00000100,
692         0x903c, 0xffffffff, 0x00000100,
693         0xad80, 0xffffffff, 0x00000100,
694         0xac54, 0xffffffff, 0x00000100,
695         0x897c, 0xffffffff, 0x06000100,
696         0x9868, 0xffffffff, 0x00000100,
697         0x9510, 0xffffffff, 0x00000100,
698         0xaf04, 0xffffffff, 0x00000100,
699         0xae04, 0xffffffff, 0x00000100,
700         0x949c, 0xffffffff, 0x00000100,
701         0x802c, 0xffffffff, 0xe0000000,
702         0x9160, 0xffffffff, 0x00010000,
703         0x9164, 0xffffffff, 0x00030002,
704         0x9168, 0xffffffff, 0x00040007,
705         0x916c, 0xffffffff, 0x00060005,
706         0x9170, 0xffffffff, 0x00090008,
707         0x9174, 0xffffffff, 0x00020001,
708         0x9178, 0xffffffff, 0x00040003,
709         0x917c, 0xffffffff, 0x00000007,
710         0x9180, 0xffffffff, 0x00060005,
711         0x9184, 0xffffffff, 0x00090008,
712         0x9188, 0xffffffff, 0x00030002,
713         0x918c, 0xffffffff, 0x00050004,
714         0x9190, 0xffffffff, 0x00000008,
715         0x9194, 0xffffffff, 0x00070006,
716         0x9198, 0xffffffff, 0x000a0009,
717         0x919c, 0xffffffff, 0x00040003,
718         0x91a0, 0xffffffff, 0x00060005,
719         0x91a4, 0xffffffff, 0x00000009,
720         0x91a8, 0xffffffff, 0x00080007,
721         0x91ac, 0xffffffff, 0x000b000a,
722         0x91b0, 0xffffffff, 0x00050004,
723         0x91b4, 0xffffffff, 0x00070006,
724         0x91b8, 0xffffffff, 0x0008000b,
725         0x91bc, 0xffffffff, 0x000a0009,
726         0x91c0, 0xffffffff, 0x000d000c,
727         0x9200, 0xffffffff, 0x00090008,
728         0x9204, 0xffffffff, 0x000b000a,
729         0x9208, 0xffffffff, 0x000c000f,
730         0x920c, 0xffffffff, 0x000e000d,
731         0x9210, 0xffffffff, 0x00110010,
732         0x9214, 0xffffffff, 0x000a0009,
733         0x9218, 0xffffffff, 0x000c000b,
734         0x921c, 0xffffffff, 0x0000000f,
735         0x9220, 0xffffffff, 0x000e000d,
736         0x9224, 0xffffffff, 0x00110010,
737         0x9228, 0xffffffff, 0x000b000a,
738         0x922c, 0xffffffff, 0x000d000c,
739         0x9230, 0xffffffff, 0x00000010,
740         0x9234, 0xffffffff, 0x000f000e,
741         0x9238, 0xffffffff, 0x00120011,
742         0x923c, 0xffffffff, 0x000c000b,
743         0x9240, 0xffffffff, 0x000e000d,
744         0x9244, 0xffffffff, 0x00000011,
745         0x9248, 0xffffffff, 0x0010000f,
746         0x924c, 0xffffffff, 0x00130012,
747         0x9250, 0xffffffff, 0x000d000c,
748         0x9254, 0xffffffff, 0x000f000e,
749         0x9258, 0xffffffff, 0x00100013,
750         0x925c, 0xffffffff, 0x00120011,
751         0x9260, 0xffffffff, 0x00150014,
752         0x9150, 0xffffffff, 0x96940200,
753         0x8708, 0xffffffff, 0x00900100,
754         0xc478, 0xffffffff, 0x00000080,
755         0xc404, 0xffffffff, 0x0020003f,
756         0x30, 0xffffffff, 0x0000001c,
757         0x34, 0x000f0000, 0x000f0000,
758         0x160c, 0xffffffff, 0x00000100,
759         0x1024, 0xffffffff, 0x00000100,
760         0x102c, 0x00000101, 0x00000000,
761         0x20a8, 0xffffffff, 0x00000104,
762         0x55e4, 0xff000fff, 0x00000100,
763         0x55e8, 0x00000001, 0x00000001,
764         0x2f50, 0x00000001, 0x00000001,
765         0x30cc, 0xc0000fff, 0x00000104,
766         0xc1e4, 0x00000001, 0x00000001,
767         0xd0c0, 0xfffffff0, 0x00000100,
768         0xd8c0, 0xfffffff0, 0x00000100
769 };
770
771 static const u32 verde_mgcg_cgcg_init[] =
772 {
773         0xc400, 0xffffffff, 0xfffffffc,
774         0x802c, 0xffffffff, 0xe0000000,
775         0x9a60, 0xffffffff, 0x00000100,
776         0x92a4, 0xffffffff, 0x00000100,
777         0xc164, 0xffffffff, 0x00000100,
778         0x9774, 0xffffffff, 0x00000100,
779         0x8984, 0xffffffff, 0x06000100,
780         0x8a18, 0xffffffff, 0x00000100,
781         0x92a0, 0xffffffff, 0x00000100,
782         0xc380, 0xffffffff, 0x00000100,
783         0x8b28, 0xffffffff, 0x00000100,
784         0x9144, 0xffffffff, 0x00000100,
785         0x8d88, 0xffffffff, 0x00000100,
786         0x8d8c, 0xffffffff, 0x00000100,
787         0x9030, 0xffffffff, 0x00000100,
788         0x9034, 0xffffffff, 0x00000100,
789         0x9038, 0xffffffff, 0x00000100,
790         0x903c, 0xffffffff, 0x00000100,
791         0xad80, 0xffffffff, 0x00000100,
792         0xac54, 0xffffffff, 0x00000100,
793         0x897c, 0xffffffff, 0x06000100,
794         0x9868, 0xffffffff, 0x00000100,
795         0x9510, 0xffffffff, 0x00000100,
796         0xaf04, 0xffffffff, 0x00000100,
797         0xae04, 0xffffffff, 0x00000100,
798         0x949c, 0xffffffff, 0x00000100,
799         0x802c, 0xffffffff, 0xe0000000,
800         0x9160, 0xffffffff, 0x00010000,
801         0x9164, 0xffffffff, 0x00030002,
802         0x9168, 0xffffffff, 0x00040007,
803         0x916c, 0xffffffff, 0x00060005,
804         0x9170, 0xffffffff, 0x00090008,
805         0x9174, 0xffffffff, 0x00020001,
806         0x9178, 0xffffffff, 0x00040003,
807         0x917c, 0xffffffff, 0x00000007,
808         0x9180, 0xffffffff, 0x00060005,
809         0x9184, 0xffffffff, 0x00090008,
810         0x9188, 0xffffffff, 0x00030002,
811         0x918c, 0xffffffff, 0x00050004,
812         0x9190, 0xffffffff, 0x00000008,
813         0x9194, 0xffffffff, 0x00070006,
814         0x9198, 0xffffffff, 0x000a0009,
815         0x919c, 0xffffffff, 0x00040003,
816         0x91a0, 0xffffffff, 0x00060005,
817         0x91a4, 0xffffffff, 0x00000009,
818         0x91a8, 0xffffffff, 0x00080007,
819         0x91ac, 0xffffffff, 0x000b000a,
820         0x91b0, 0xffffffff, 0x00050004,
821         0x91b4, 0xffffffff, 0x00070006,
822         0x91b8, 0xffffffff, 0x0008000b,
823         0x91bc, 0xffffffff, 0x000a0009,
824         0x91c0, 0xffffffff, 0x000d000c,
825         0x9200, 0xffffffff, 0x00090008,
826         0x9204, 0xffffffff, 0x000b000a,
827         0x9208, 0xffffffff, 0x000c000f,
828         0x920c, 0xffffffff, 0x000e000d,
829         0x9210, 0xffffffff, 0x00110010,
830         0x9214, 0xffffffff, 0x000a0009,
831         0x9218, 0xffffffff, 0x000c000b,
832         0x921c, 0xffffffff, 0x0000000f,
833         0x9220, 0xffffffff, 0x000e000d,
834         0x9224, 0xffffffff, 0x00110010,
835         0x9228, 0xffffffff, 0x000b000a,
836         0x922c, 0xffffffff, 0x000d000c,
837         0x9230, 0xffffffff, 0x00000010,
838         0x9234, 0xffffffff, 0x000f000e,
839         0x9238, 0xffffffff, 0x00120011,
840         0x923c, 0xffffffff, 0x000c000b,
841         0x9240, 0xffffffff, 0x000e000d,
842         0x9244, 0xffffffff, 0x00000011,
843         0x9248, 0xffffffff, 0x0010000f,
844         0x924c, 0xffffffff, 0x00130012,
845         0x9250, 0xffffffff, 0x000d000c,
846         0x9254, 0xffffffff, 0x000f000e,
847         0x9258, 0xffffffff, 0x00100013,
848         0x925c, 0xffffffff, 0x00120011,
849         0x9260, 0xffffffff, 0x00150014,
850         0x9150, 0xffffffff, 0x96940200,
851         0x8708, 0xffffffff, 0x00900100,
852         0xc478, 0xffffffff, 0x00000080,
853         0xc404, 0xffffffff, 0x0020003f,
854         0x30, 0xffffffff, 0x0000001c,
855         0x34, 0x000f0000, 0x000f0000,
856         0x160c, 0xffffffff, 0x00000100,
857         0x1024, 0xffffffff, 0x00000100,
858         0x102c, 0x00000101, 0x00000000,
859         0x20a8, 0xffffffff, 0x00000104,
860         0x264c, 0x000c0000, 0x000c0000,
861         0x2648, 0x000c0000, 0x000c0000,
862         0x55e4, 0xff000fff, 0x00000100,
863         0x55e8, 0x00000001, 0x00000001,
864         0x2f50, 0x00000001, 0x00000001,
865         0x30cc, 0xc0000fff, 0x00000104,
866         0xc1e4, 0x00000001, 0x00000001,
867         0xd0c0, 0xfffffff0, 0x00000100,
868         0xd8c0, 0xfffffff0, 0x00000100
869 };
870
871 static const u32 oland_mgcg_cgcg_init[] =
872 {
873         0xc400, 0xffffffff, 0xfffffffc,
874         0x802c, 0xffffffff, 0xe0000000,
875         0x9a60, 0xffffffff, 0x00000100,
876         0x92a4, 0xffffffff, 0x00000100,
877         0xc164, 0xffffffff, 0x00000100,
878         0x9774, 0xffffffff, 0x00000100,
879         0x8984, 0xffffffff, 0x06000100,
880         0x8a18, 0xffffffff, 0x00000100,
881         0x92a0, 0xffffffff, 0x00000100,
882         0xc380, 0xffffffff, 0x00000100,
883         0x8b28, 0xffffffff, 0x00000100,
884         0x9144, 0xffffffff, 0x00000100,
885         0x8d88, 0xffffffff, 0x00000100,
886         0x8d8c, 0xffffffff, 0x00000100,
887         0x9030, 0xffffffff, 0x00000100,
888         0x9034, 0xffffffff, 0x00000100,
889         0x9038, 0xffffffff, 0x00000100,
890         0x903c, 0xffffffff, 0x00000100,
891         0xad80, 0xffffffff, 0x00000100,
892         0xac54, 0xffffffff, 0x00000100,
893         0x897c, 0xffffffff, 0x06000100,
894         0x9868, 0xffffffff, 0x00000100,
895         0x9510, 0xffffffff, 0x00000100,
896         0xaf04, 0xffffffff, 0x00000100,
897         0xae04, 0xffffffff, 0x00000100,
898         0x949c, 0xffffffff, 0x00000100,
899         0x802c, 0xffffffff, 0xe0000000,
900         0x9160, 0xffffffff, 0x00010000,
901         0x9164, 0xffffffff, 0x00030002,
902         0x9168, 0xffffffff, 0x00040007,
903         0x916c, 0xffffffff, 0x00060005,
904         0x9170, 0xffffffff, 0x00090008,
905         0x9174, 0xffffffff, 0x00020001,
906         0x9178, 0xffffffff, 0x00040003,
907         0x917c, 0xffffffff, 0x00000007,
908         0x9180, 0xffffffff, 0x00060005,
909         0x9184, 0xffffffff, 0x00090008,
910         0x9188, 0xffffffff, 0x00030002,
911         0x918c, 0xffffffff, 0x00050004,
912         0x9190, 0xffffffff, 0x00000008,
913         0x9194, 0xffffffff, 0x00070006,
914         0x9198, 0xffffffff, 0x000a0009,
915         0x919c, 0xffffffff, 0x00040003,
916         0x91a0, 0xffffffff, 0x00060005,
917         0x91a4, 0xffffffff, 0x00000009,
918         0x91a8, 0xffffffff, 0x00080007,
919         0x91ac, 0xffffffff, 0x000b000a,
920         0x91b0, 0xffffffff, 0x00050004,
921         0x91b4, 0xffffffff, 0x00070006,
922         0x91b8, 0xffffffff, 0x0008000b,
923         0x91bc, 0xffffffff, 0x000a0009,
924         0x91c0, 0xffffffff, 0x000d000c,
925         0x91c4, 0xffffffff, 0x00060005,
926         0x91c8, 0xffffffff, 0x00080007,
927         0x91cc, 0xffffffff, 0x0000000b,
928         0x91d0, 0xffffffff, 0x000a0009,
929         0x91d4, 0xffffffff, 0x000d000c,
930         0x9150, 0xffffffff, 0x96940200,
931         0x8708, 0xffffffff, 0x00900100,
932         0xc478, 0xffffffff, 0x00000080,
933         0xc404, 0xffffffff, 0x0020003f,
934         0x30, 0xffffffff, 0x0000001c,
935         0x34, 0x000f0000, 0x000f0000,
936         0x160c, 0xffffffff, 0x00000100,
937         0x1024, 0xffffffff, 0x00000100,
938         0x102c, 0x00000101, 0x00000000,
939         0x20a8, 0xffffffff, 0x00000104,
940         0x264c, 0x000c0000, 0x000c0000,
941         0x2648, 0x000c0000, 0x000c0000,
942         0x55e4, 0xff000fff, 0x00000100,
943         0x55e8, 0x00000001, 0x00000001,
944         0x2f50, 0x00000001, 0x00000001,
945         0x30cc, 0xc0000fff, 0x00000104,
946         0xc1e4, 0x00000001, 0x00000001,
947         0xd0c0, 0xfffffff0, 0x00000100,
948         0xd8c0, 0xfffffff0, 0x00000100
949 };
950
951 static const u32 hainan_mgcg_cgcg_init[] =
952 {
953         0xc400, 0xffffffff, 0xfffffffc,
954         0x802c, 0xffffffff, 0xe0000000,
955         0x9a60, 0xffffffff, 0x00000100,
956         0x92a4, 0xffffffff, 0x00000100,
957         0xc164, 0xffffffff, 0x00000100,
958         0x9774, 0xffffffff, 0x00000100,
959         0x8984, 0xffffffff, 0x06000100,
960         0x8a18, 0xffffffff, 0x00000100,
961         0x92a0, 0xffffffff, 0x00000100,
962         0xc380, 0xffffffff, 0x00000100,
963         0x8b28, 0xffffffff, 0x00000100,
964         0x9144, 0xffffffff, 0x00000100,
965         0x8d88, 0xffffffff, 0x00000100,
966         0x8d8c, 0xffffffff, 0x00000100,
967         0x9030, 0xffffffff, 0x00000100,
968         0x9034, 0xffffffff, 0x00000100,
969         0x9038, 0xffffffff, 0x00000100,
970         0x903c, 0xffffffff, 0x00000100,
971         0xad80, 0xffffffff, 0x00000100,
972         0xac54, 0xffffffff, 0x00000100,
973         0x897c, 0xffffffff, 0x06000100,
974         0x9868, 0xffffffff, 0x00000100,
975         0x9510, 0xffffffff, 0x00000100,
976         0xaf04, 0xffffffff, 0x00000100,
977         0xae04, 0xffffffff, 0x00000100,
978         0x949c, 0xffffffff, 0x00000100,
979         0x802c, 0xffffffff, 0xe0000000,
980         0x9160, 0xffffffff, 0x00010000,
981         0x9164, 0xffffffff, 0x00030002,
982         0x9168, 0xffffffff, 0x00040007,
983         0x916c, 0xffffffff, 0x00060005,
984         0x9170, 0xffffffff, 0x00090008,
985         0x9174, 0xffffffff, 0x00020001,
986         0x9178, 0xffffffff, 0x00040003,
987         0x917c, 0xffffffff, 0x00000007,
988         0x9180, 0xffffffff, 0x00060005,
989         0x9184, 0xffffffff, 0x00090008,
990         0x9188, 0xffffffff, 0x00030002,
991         0x918c, 0xffffffff, 0x00050004,
992         0x9190, 0xffffffff, 0x00000008,
993         0x9194, 0xffffffff, 0x00070006,
994         0x9198, 0xffffffff, 0x000a0009,
995         0x919c, 0xffffffff, 0x00040003,
996         0x91a0, 0xffffffff, 0x00060005,
997         0x91a4, 0xffffffff, 0x00000009,
998         0x91a8, 0xffffffff, 0x00080007,
999         0x91ac, 0xffffffff, 0x000b000a,
1000         0x91b0, 0xffffffff, 0x00050004,
1001         0x91b4, 0xffffffff, 0x00070006,
1002         0x91b8, 0xffffffff, 0x0008000b,
1003         0x91bc, 0xffffffff, 0x000a0009,
1004         0x91c0, 0xffffffff, 0x000d000c,
1005         0x91c4, 0xffffffff, 0x00060005,
1006         0x91c8, 0xffffffff, 0x00080007,
1007         0x91cc, 0xffffffff, 0x0000000b,
1008         0x91d0, 0xffffffff, 0x000a0009,
1009         0x91d4, 0xffffffff, 0x000d000c,
1010         0x9150, 0xffffffff, 0x96940200,
1011         0x8708, 0xffffffff, 0x00900100,
1012         0xc478, 0xffffffff, 0x00000080,
1013         0xc404, 0xffffffff, 0x0020003f,
1014         0x30, 0xffffffff, 0x0000001c,
1015         0x34, 0x000f0000, 0x000f0000,
1016         0x160c, 0xffffffff, 0x00000100,
1017         0x1024, 0xffffffff, 0x00000100,
1018         0x20a8, 0xffffffff, 0x00000104,
1019         0x264c, 0x000c0000, 0x000c0000,
1020         0x2648, 0x000c0000, 0x000c0000,
1021         0x2f50, 0x00000001, 0x00000001,
1022         0x30cc, 0xc0000fff, 0x00000104,
1023         0xc1e4, 0x00000001, 0x00000001,
1024         0xd0c0, 0xfffffff0, 0x00000100,
1025         0xd8c0, 0xfffffff0, 0x00000100
1026 };
1027
1028 static u32 verde_pg_init[] =
1029 {
1030         0x353c, 0xffffffff, 0x40000,
1031         0x3538, 0xffffffff, 0x200010ff,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x7007,
1038         0x3538, 0xffffffff, 0x300010ff,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x400000,
1045         0x3538, 0xffffffff, 0x100010ff,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x120200,
1052         0x3538, 0xffffffff, 0x500010ff,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x1e1e16,
1059         0x3538, 0xffffffff, 0x600010ff,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x171f1e,
1066         0x3538, 0xffffffff, 0x700010ff,
1067         0x353c, 0xffffffff, 0x0,
1068         0x353c, 0xffffffff, 0x0,
1069         0x353c, 0xffffffff, 0x0,
1070         0x353c, 0xffffffff, 0x0,
1071         0x353c, 0xffffffff, 0x0,
1072         0x353c, 0xffffffff, 0x0,
1073         0x3538, 0xffffffff, 0x9ff,
1074         0x3500, 0xffffffff, 0x0,
1075         0x3504, 0xffffffff, 0x10000800,
1076         0x3504, 0xffffffff, 0xf,
1077         0x3504, 0xffffffff, 0xf,
1078         0x3500, 0xffffffff, 0x4,
1079         0x3504, 0xffffffff, 0x1000051e,
1080         0x3504, 0xffffffff, 0xffff,
1081         0x3504, 0xffffffff, 0xffff,
1082         0x3500, 0xffffffff, 0x8,
1083         0x3504, 0xffffffff, 0x80500,
1084         0x3500, 0xffffffff, 0x12,
1085         0x3504, 0xffffffff, 0x9050c,
1086         0x3500, 0xffffffff, 0x1d,
1087         0x3504, 0xffffffff, 0xb052c,
1088         0x3500, 0xffffffff, 0x2a,
1089         0x3504, 0xffffffff, 0x1053e,
1090         0x3500, 0xffffffff, 0x2d,
1091         0x3504, 0xffffffff, 0x10546,
1092         0x3500, 0xffffffff, 0x30,
1093         0x3504, 0xffffffff, 0xa054e,
1094         0x3500, 0xffffffff, 0x3c,
1095         0x3504, 0xffffffff, 0x1055f,
1096         0x3500, 0xffffffff, 0x3f,
1097         0x3504, 0xffffffff, 0x10567,
1098         0x3500, 0xffffffff, 0x42,
1099         0x3504, 0xffffffff, 0x1056f,
1100         0x3500, 0xffffffff, 0x45,
1101         0x3504, 0xffffffff, 0x10572,
1102         0x3500, 0xffffffff, 0x48,
1103         0x3504, 0xffffffff, 0x20575,
1104         0x3500, 0xffffffff, 0x4c,
1105         0x3504, 0xffffffff, 0x190801,
1106         0x3500, 0xffffffff, 0x67,
1107         0x3504, 0xffffffff, 0x1082a,
1108         0x3500, 0xffffffff, 0x6a,
1109         0x3504, 0xffffffff, 0x1b082d,
1110         0x3500, 0xffffffff, 0x87,
1111         0x3504, 0xffffffff, 0x310851,
1112         0x3500, 0xffffffff, 0xba,
1113         0x3504, 0xffffffff, 0x891,
1114         0x3500, 0xffffffff, 0xbc,
1115         0x3504, 0xffffffff, 0x893,
1116         0x3500, 0xffffffff, 0xbe,
1117         0x3504, 0xffffffff, 0x20895,
1118         0x3500, 0xffffffff, 0xc2,
1119         0x3504, 0xffffffff, 0x20899,
1120         0x3500, 0xffffffff, 0xc6,
1121         0x3504, 0xffffffff, 0x2089d,
1122         0x3500, 0xffffffff, 0xca,
1123         0x3504, 0xffffffff, 0x8a1,
1124         0x3500, 0xffffffff, 0xcc,
1125         0x3504, 0xffffffff, 0x8a3,
1126         0x3500, 0xffffffff, 0xce,
1127         0x3504, 0xffffffff, 0x308a5,
1128         0x3500, 0xffffffff, 0xd3,
1129         0x3504, 0xffffffff, 0x6d08cd,
1130         0x3500, 0xffffffff, 0x142,
1131         0x3504, 0xffffffff, 0x2000095a,
1132         0x3504, 0xffffffff, 0x1,
1133         0x3500, 0xffffffff, 0x144,
1134         0x3504, 0xffffffff, 0x301f095b,
1135         0x3500, 0xffffffff, 0x165,
1136         0x3504, 0xffffffff, 0xc094d,
1137         0x3500, 0xffffffff, 0x173,
1138         0x3504, 0xffffffff, 0xf096d,
1139         0x3500, 0xffffffff, 0x184,
1140         0x3504, 0xffffffff, 0x15097f,
1141         0x3500, 0xffffffff, 0x19b,
1142         0x3504, 0xffffffff, 0xc0998,
1143         0x3500, 0xffffffff, 0x1a9,
1144         0x3504, 0xffffffff, 0x409a7,
1145         0x3500, 0xffffffff, 0x1af,
1146         0x3504, 0xffffffff, 0xcdc,
1147         0x3500, 0xffffffff, 0x1b1,
1148         0x3504, 0xffffffff, 0x800,
1149         0x3508, 0xffffffff, 0x6c9b2000,
1150         0x3510, 0xfc00, 0x2000,
1151         0x3544, 0xffffffff, 0xfc0,
1152         0x28d4, 0x00000100, 0x100
1153 };
1154
1155 static void si_init_golden_registers(struct radeon_device *rdev)
1156 {
1157         switch (rdev->family) {
1158         case CHIP_TAHITI:
1159                 radeon_program_register_sequence(rdev,
1160                                                  tahiti_golden_registers,
1161                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1162                 radeon_program_register_sequence(rdev,
1163                                                  tahiti_golden_rlc_registers,
1164                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1165                 radeon_program_register_sequence(rdev,
1166                                                  tahiti_mgcg_cgcg_init,
1167                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1168                 radeon_program_register_sequence(rdev,
1169                                                  tahiti_golden_registers2,
1170                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1171                 break;
1172         case CHIP_PITCAIRN:
1173                 radeon_program_register_sequence(rdev,
1174                                                  pitcairn_golden_registers,
1175                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1176                 radeon_program_register_sequence(rdev,
1177                                                  pitcairn_golden_rlc_registers,
1178                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1179                 radeon_program_register_sequence(rdev,
1180                                                  pitcairn_mgcg_cgcg_init,
1181                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1182                 break;
1183         case CHIP_VERDE:
1184                 radeon_program_register_sequence(rdev,
1185                                                  verde_golden_registers,
1186                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1187                 radeon_program_register_sequence(rdev,
1188                                                  verde_golden_rlc_registers,
1189                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1190                 radeon_program_register_sequence(rdev,
1191                                                  verde_mgcg_cgcg_init,
1192                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1193                 radeon_program_register_sequence(rdev,
1194                                                  verde_pg_init,
1195                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1196                 break;
1197         case CHIP_OLAND:
1198                 radeon_program_register_sequence(rdev,
1199                                                  oland_golden_registers,
1200                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1201                 radeon_program_register_sequence(rdev,
1202                                                  oland_golden_rlc_registers,
1203                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1204                 radeon_program_register_sequence(rdev,
1205                                                  oland_mgcg_cgcg_init,
1206                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1207                 break;
1208         case CHIP_HAINAN:
1209                 radeon_program_register_sequence(rdev,
1210                                                  hainan_golden_registers,
1211                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1212                 radeon_program_register_sequence(rdev,
1213                                                  hainan_golden_registers2,
1214                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1215                 radeon_program_register_sequence(rdev,
1216                                                  hainan_mgcg_cgcg_init,
1217                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1218                 break;
1219         default:
1220                 break;
1221         }
1222 }
1223
1224 #define PCIE_BUS_CLK                10000
1225 #define TCLK                        (PCIE_BUS_CLK / 10)
1226
1227 /**
1228  * si_get_xclk - get the xclk
1229  *
1230  * @rdev: radeon_device pointer
1231  *
1232  * Returns the reference clock used by the gfx engine
1233  * (SI).
1234  */
1235 u32 si_get_xclk(struct radeon_device *rdev)
1236 {
1237         u32 reference_clock = rdev->clock.spll.reference_freq;
1238         u32 tmp;
1239
1240         tmp = RREG32(CG_CLKPIN_CNTL_2);
1241         if (tmp & MUX_TCLK_TO_XCLK)
1242                 return TCLK;
1243
1244         tmp = RREG32(CG_CLKPIN_CNTL);
1245         if (tmp & XTALIN_DIVIDE)
1246                 return reference_clock / 4;
1247
1248         return reference_clock;
1249 }
1250
1251 /* get temperature in millidegrees */
1252 int si_get_temp(struct radeon_device *rdev)
1253 {
1254         u32 temp;
1255         int actual_temp = 0;
1256
1257         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1258                 CTF_TEMP_SHIFT;
1259
1260         if (temp & 0x200)
1261                 actual_temp = 255;
1262         else
1263                 actual_temp = temp & 0x1ff;
1264
1265         actual_temp = (actual_temp * 1000);
1266
1267         return actual_temp;
1268 }
1269
1270 #define TAHITI_IO_MC_REGS_SIZE 36
1271
1272 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1273         {0x0000006f, 0x03044000},
1274         {0x00000070, 0x0480c018},
1275         {0x00000071, 0x00000040},
1276         {0x00000072, 0x01000000},
1277         {0x00000074, 0x000000ff},
1278         {0x00000075, 0x00143400},
1279         {0x00000076, 0x08ec0800},
1280         {0x00000077, 0x040000cc},
1281         {0x00000079, 0x00000000},
1282         {0x0000007a, 0x21000409},
1283         {0x0000007c, 0x00000000},
1284         {0x0000007d, 0xe8000000},
1285         {0x0000007e, 0x044408a8},
1286         {0x0000007f, 0x00000003},
1287         {0x00000080, 0x00000000},
1288         {0x00000081, 0x01000000},
1289         {0x00000082, 0x02000000},
1290         {0x00000083, 0x00000000},
1291         {0x00000084, 0xe3f3e4f4},
1292         {0x00000085, 0x00052024},
1293         {0x00000087, 0x00000000},
1294         {0x00000088, 0x66036603},
1295         {0x00000089, 0x01000000},
1296         {0x0000008b, 0x1c0a0000},
1297         {0x0000008c, 0xff010000},
1298         {0x0000008e, 0xffffefff},
1299         {0x0000008f, 0xfff3efff},
1300         {0x00000090, 0xfff3efbf},
1301         {0x00000094, 0x00101101},
1302         {0x00000095, 0x00000fff},
1303         {0x00000096, 0x00116fff},
1304         {0x00000097, 0x60010000},
1305         {0x00000098, 0x10010000},
1306         {0x00000099, 0x00006000},
1307         {0x0000009a, 0x00001000},
1308         {0x0000009f, 0x00a77400}
1309 };
1310
1311 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1312         {0x0000006f, 0x03044000},
1313         {0x00000070, 0x0480c018},
1314         {0x00000071, 0x00000040},
1315         {0x00000072, 0x01000000},
1316         {0x00000074, 0x000000ff},
1317         {0x00000075, 0x00143400},
1318         {0x00000076, 0x08ec0800},
1319         {0x00000077, 0x040000cc},
1320         {0x00000079, 0x00000000},
1321         {0x0000007a, 0x21000409},
1322         {0x0000007c, 0x00000000},
1323         {0x0000007d, 0xe8000000},
1324         {0x0000007e, 0x044408a8},
1325         {0x0000007f, 0x00000003},
1326         {0x00000080, 0x00000000},
1327         {0x00000081, 0x01000000},
1328         {0x00000082, 0x02000000},
1329         {0x00000083, 0x00000000},
1330         {0x00000084, 0xe3f3e4f4},
1331         {0x00000085, 0x00052024},
1332         {0x00000087, 0x00000000},
1333         {0x00000088, 0x66036603},
1334         {0x00000089, 0x01000000},
1335         {0x0000008b, 0x1c0a0000},
1336         {0x0000008c, 0xff010000},
1337         {0x0000008e, 0xffffefff},
1338         {0x0000008f, 0xfff3efff},
1339         {0x00000090, 0xfff3efbf},
1340         {0x00000094, 0x00101101},
1341         {0x00000095, 0x00000fff},
1342         {0x00000096, 0x00116fff},
1343         {0x00000097, 0x60010000},
1344         {0x00000098, 0x10010000},
1345         {0x00000099, 0x00006000},
1346         {0x0000009a, 0x00001000},
1347         {0x0000009f, 0x00a47400}
1348 };
1349
1350 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351         {0x0000006f, 0x03044000},
1352         {0x00000070, 0x0480c018},
1353         {0x00000071, 0x00000040},
1354         {0x00000072, 0x01000000},
1355         {0x00000074, 0x000000ff},
1356         {0x00000075, 0x00143400},
1357         {0x00000076, 0x08ec0800},
1358         {0x00000077, 0x040000cc},
1359         {0x00000079, 0x00000000},
1360         {0x0000007a, 0x21000409},
1361         {0x0000007c, 0x00000000},
1362         {0x0000007d, 0xe8000000},
1363         {0x0000007e, 0x044408a8},
1364         {0x0000007f, 0x00000003},
1365         {0x00000080, 0x00000000},
1366         {0x00000081, 0x01000000},
1367         {0x00000082, 0x02000000},
1368         {0x00000083, 0x00000000},
1369         {0x00000084, 0xe3f3e4f4},
1370         {0x00000085, 0x00052024},
1371         {0x00000087, 0x00000000},
1372         {0x00000088, 0x66036603},
1373         {0x00000089, 0x01000000},
1374         {0x0000008b, 0x1c0a0000},
1375         {0x0000008c, 0xff010000},
1376         {0x0000008e, 0xffffefff},
1377         {0x0000008f, 0xfff3efff},
1378         {0x00000090, 0xfff3efbf},
1379         {0x00000094, 0x00101101},
1380         {0x00000095, 0x00000fff},
1381         {0x00000096, 0x00116fff},
1382         {0x00000097, 0x60010000},
1383         {0x00000098, 0x10010000},
1384         {0x00000099, 0x00006000},
1385         {0x0000009a, 0x00001000},
1386         {0x0000009f, 0x00a37400}
1387 };
1388
1389 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390         {0x0000006f, 0x03044000},
1391         {0x00000070, 0x0480c018},
1392         {0x00000071, 0x00000040},
1393         {0x00000072, 0x01000000},
1394         {0x00000074, 0x000000ff},
1395         {0x00000075, 0x00143400},
1396         {0x00000076, 0x08ec0800},
1397         {0x00000077, 0x040000cc},
1398         {0x00000079, 0x00000000},
1399         {0x0000007a, 0x21000409},
1400         {0x0000007c, 0x00000000},
1401         {0x0000007d, 0xe8000000},
1402         {0x0000007e, 0x044408a8},
1403         {0x0000007f, 0x00000003},
1404         {0x00000080, 0x00000000},
1405         {0x00000081, 0x01000000},
1406         {0x00000082, 0x02000000},
1407         {0x00000083, 0x00000000},
1408         {0x00000084, 0xe3f3e4f4},
1409         {0x00000085, 0x00052024},
1410         {0x00000087, 0x00000000},
1411         {0x00000088, 0x66036603},
1412         {0x00000089, 0x01000000},
1413         {0x0000008b, 0x1c0a0000},
1414         {0x0000008c, 0xff010000},
1415         {0x0000008e, 0xffffefff},
1416         {0x0000008f, 0xfff3efff},
1417         {0x00000090, 0xfff3efbf},
1418         {0x00000094, 0x00101101},
1419         {0x00000095, 0x00000fff},
1420         {0x00000096, 0x00116fff},
1421         {0x00000097, 0x60010000},
1422         {0x00000098, 0x10010000},
1423         {0x00000099, 0x00006000},
1424         {0x0000009a, 0x00001000},
1425         {0x0000009f, 0x00a17730}
1426 };
1427
1428 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429         {0x0000006f, 0x03044000},
1430         {0x00000070, 0x0480c018},
1431         {0x00000071, 0x00000040},
1432         {0x00000072, 0x01000000},
1433         {0x00000074, 0x000000ff},
1434         {0x00000075, 0x00143400},
1435         {0x00000076, 0x08ec0800},
1436         {0x00000077, 0x040000cc},
1437         {0x00000079, 0x00000000},
1438         {0x0000007a, 0x21000409},
1439         {0x0000007c, 0x00000000},
1440         {0x0000007d, 0xe8000000},
1441         {0x0000007e, 0x044408a8},
1442         {0x0000007f, 0x00000003},
1443         {0x00000080, 0x00000000},
1444         {0x00000081, 0x01000000},
1445         {0x00000082, 0x02000000},
1446         {0x00000083, 0x00000000},
1447         {0x00000084, 0xe3f3e4f4},
1448         {0x00000085, 0x00052024},
1449         {0x00000087, 0x00000000},
1450         {0x00000088, 0x66036603},
1451         {0x00000089, 0x01000000},
1452         {0x0000008b, 0x1c0a0000},
1453         {0x0000008c, 0xff010000},
1454         {0x0000008e, 0xffffefff},
1455         {0x0000008f, 0xfff3efff},
1456         {0x00000090, 0xfff3efbf},
1457         {0x00000094, 0x00101101},
1458         {0x00000095, 0x00000fff},
1459         {0x00000096, 0x00116fff},
1460         {0x00000097, 0x60010000},
1461         {0x00000098, 0x10010000},
1462         {0x00000099, 0x00006000},
1463         {0x0000009a, 0x00001000},
1464         {0x0000009f, 0x00a07730}
1465 };
1466
1467 /* ucode loading */
1468 static int si_mc_load_microcode(struct radeon_device *rdev)
1469 {
1470         const __be32 *fw_data;
1471         u32 running, blackout = 0;
1472         u32 *io_mc_regs;
1473         int i, ucode_size, regs_size;
1474
1475         if (!rdev->mc_fw)
1476                 return -EINVAL;
1477
1478         switch (rdev->family) {
1479         case CHIP_TAHITI:
1480                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_PITCAIRN:
1485                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1486                 ucode_size = SI_MC_UCODE_SIZE;
1487                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1488                 break;
1489         case CHIP_VERDE:
1490         default:
1491                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1492                 ucode_size = SI_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         case CHIP_OLAND:
1496                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1497                 ucode_size = OLAND_MC_UCODE_SIZE;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         case CHIP_HAINAN:
1501                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1502                 ucode_size = OLAND_MC_UCODE_SIZE;
1503                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1504                 break;
1505         }
1506
1507         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1508
1509         if (running == 0) {
1510                 if (running) {
1511                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1512                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1513                 }
1514
1515                 /* reset the engine and set to writable */
1516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1518
1519                 /* load mc io regs */
1520                 for (i = 0; i < regs_size; i++) {
1521                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1522                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1523                 }
1524                 /* load the MC ucode */
1525                 fw_data = (const __be32 *)rdev->mc_fw->data;
1526                 for (i = 0; i < ucode_size; i++)
1527                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1528
1529                 /* put the engine back into the active state */
1530                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1533
1534                 /* wait for training to complete */
1535                 for (i = 0; i < rdev->usec_timeout; i++) {
1536                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1537                                 break;
1538                         udelay(1);
1539                 }
1540                 for (i = 0; i < rdev->usec_timeout; i++) {
1541                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1542                                 break;
1543                         udelay(1);
1544                 }
1545
1546                 if (running)
1547                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1548         }
1549
1550         return 0;
1551 }
1552
1553 static int si_init_microcode(struct radeon_device *rdev)
1554 {
1555         const char *chip_name;
1556         const char *rlc_chip_name;
1557         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1558         size_t smc_req_size;
1559         char fw_name[30];
1560         int err;
1561
1562         DRM_DEBUG("\n");
1563
1564         switch (rdev->family) {
1565         case CHIP_TAHITI:
1566                 chip_name = "TAHITI";
1567                 rlc_chip_name = "TAHITI";
1568                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1570                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1571                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_PITCAIRN:
1576                 chip_name = "PITCAIRN";
1577                 rlc_chip_name = "PITCAIRN";
1578                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1580                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1581                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1583                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1584                 break;
1585         case CHIP_VERDE:
1586                 chip_name = "VERDE";
1587                 rlc_chip_name = "VERDE";
1588                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1589                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1590                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1591                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1592                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1593                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1594                 break;
1595         case CHIP_OLAND:
1596                 chip_name = "OLAND";
1597                 rlc_chip_name = "OLAND";
1598                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1599                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1600                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1601                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1602                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1603                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1604                 break;
1605         case CHIP_HAINAN:
1606                 chip_name = "HAINAN";
1607                 rlc_chip_name = "HAINAN";
1608                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1609                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1610                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1611                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1612                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1613                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1614                 break;
1615         default: BUG();
1616         }
1617
1618         DRM_INFO("Loading %s Microcode\n", chip_name);
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1621         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->pfp_fw->size != pfp_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->pfp_fw->size, fw_name);
1628                 err = -EINVAL;
1629                 goto out;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1633         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->me_fw->size != me_req_size) {
1637                 printk(KERN_ERR
1638                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->me_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1644         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->ce_fw->size != ce_req_size) {
1648                 printk(KERN_ERR
1649                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->ce_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1655         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->rlc_fw->size != rlc_req_size) {
1659                 printk(KERN_ERR
1660                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->rlc_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1666         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1667         if (err)
1668                 goto out;
1669         if (rdev->mc_fw->size != mc_req_size) {
1670                 printk(KERN_ERR
1671                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1672                        rdev->mc_fw->size, fw_name);
1673                 err = -EINVAL;
1674         }
1675
1676         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1677         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1678         if (err) {
1679                 printk(KERN_ERR
1680                        "smc: error loading firmware \"%s\"\n",
1681                        fw_name);
1682                 release_firmware(rdev->smc_fw);
1683                 rdev->smc_fw = NULL;
1684         } else if (rdev->smc_fw->size != smc_req_size) {
1685                 printk(KERN_ERR
1686                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1687                        rdev->smc_fw->size, fw_name);
1688                 err = -EINVAL;
1689         }
1690
1691 out:
1692         if (err) {
1693                 if (err != -EINVAL)
1694                         printk(KERN_ERR
1695                                "si_cp: Failed to load firmware \"%s\"\n",
1696                                fw_name);
1697                 release_firmware(rdev->pfp_fw);
1698                 rdev->pfp_fw = NULL;
1699                 release_firmware(rdev->me_fw);
1700                 rdev->me_fw = NULL;
1701                 release_firmware(rdev->ce_fw);
1702                 rdev->ce_fw = NULL;
1703                 release_firmware(rdev->rlc_fw);
1704                 rdev->rlc_fw = NULL;
1705                 release_firmware(rdev->mc_fw);
1706                 rdev->mc_fw = NULL;
1707                 release_firmware(rdev->smc_fw);
1708                 rdev->smc_fw = NULL;
1709         }
1710         return err;
1711 }
1712
1713 /* watermark setup */
1714 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1715                                    struct radeon_crtc *radeon_crtc,
1716                                    struct drm_display_mode *mode,
1717                                    struct drm_display_mode *other_mode)
1718 {
1719         u32 tmp, buffer_alloc, i;
1720         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1721         /*
1722          * Line Buffer Setup
1723          * There are 3 line buffers, each one shared by 2 display controllers.
1724          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1725          * the display controllers.  The paritioning is done via one of four
1726          * preset allocations specified in bits 21:20:
1727          *  0 - half lb
1728          *  2 - whole lb, other crtc must be disabled
1729          */
1730         /* this can get tricky if we have two large displays on a paired group
1731          * of crtcs.  Ideally for multiple large displays we'd assign them to
1732          * non-linked crtcs for maximum line buffer allocation.
1733          */
1734         if (radeon_crtc->base.enabled && mode) {
1735                 if (other_mode) {
1736                         tmp = 0; /* 1/2 */
1737                         buffer_alloc = 1;
1738                 } else {
1739                         tmp = 2; /* whole */
1740                         buffer_alloc = 2;
1741                 }
1742         } else {
1743                 tmp = 0;
1744                 buffer_alloc = 0;
1745         }
1746
1747         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1748                DC_LB_MEMORY_CONFIG(tmp));
1749
1750         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1751                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1752         for (i = 0; i < rdev->usec_timeout; i++) {
1753                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1754                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1755                         break;
1756                 udelay(1);
1757         }
1758
1759         if (radeon_crtc->base.enabled && mode) {
1760                 switch (tmp) {
1761                 case 0:
1762                 default:
1763                         return 4096 * 2;
1764                 case 2:
1765                         return 8192 * 2;
1766                 }
1767         }
1768
1769         /* controller not enabled, so no lb used */
1770         return 0;
1771 }
1772
1773 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1774 {
1775         u32 tmp = RREG32(MC_SHARED_CHMAP);
1776
1777         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1778         case 0:
1779         default:
1780                 return 1;
1781         case 1:
1782                 return 2;
1783         case 2:
1784                 return 4;
1785         case 3:
1786                 return 8;
1787         case 4:
1788                 return 3;
1789         case 5:
1790                 return 6;
1791         case 6:
1792                 return 10;
1793         case 7:
1794                 return 12;
1795         case 8:
1796                 return 16;
1797         }
1798 }
1799
1800 struct dce6_wm_params {
1801         u32 dram_channels; /* number of dram channels */
1802         u32 yclk;          /* bandwidth per dram data pin in kHz */
1803         u32 sclk;          /* engine clock in kHz */
1804         u32 disp_clk;      /* display clock in kHz */
1805         u32 src_width;     /* viewport width */
1806         u32 active_time;   /* active display time in ns */
1807         u32 blank_time;    /* blank time in ns */
1808         bool interlaced;    /* mode is interlaced */
1809         fixed20_12 vsc;    /* vertical scale ratio */
1810         u32 num_heads;     /* number of active crtcs */
1811         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1812         u32 lb_size;       /* line buffer allocated to pipe */
1813         u32 vtaps;         /* vertical scaler taps */
1814 };
1815
1816 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1817 {
1818         /* Calculate raw DRAM Bandwidth */
1819         fixed20_12 dram_efficiency; /* 0.7 */
1820         fixed20_12 yclk, dram_channels, bandwidth;
1821         fixed20_12 a;
1822
1823         a.full = dfixed_const(1000);
1824         yclk.full = dfixed_const(wm->yclk);
1825         yclk.full = dfixed_div(yclk, a);
1826         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1827         a.full = dfixed_const(10);
1828         dram_efficiency.full = dfixed_const(7);
1829         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1830         bandwidth.full = dfixed_mul(dram_channels, yclk);
1831         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1832
1833         return dfixed_trunc(bandwidth);
1834 }
1835
1836 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1837 {
1838         /* Calculate DRAM Bandwidth and the part allocated to display. */
1839         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1840         fixed20_12 yclk, dram_channels, bandwidth;
1841         fixed20_12 a;
1842
1843         a.full = dfixed_const(1000);
1844         yclk.full = dfixed_const(wm->yclk);
1845         yclk.full = dfixed_div(yclk, a);
1846         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1847         a.full = dfixed_const(10);
1848         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1849         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1850         bandwidth.full = dfixed_mul(dram_channels, yclk);
1851         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1852
1853         return dfixed_trunc(bandwidth);
1854 }
1855
1856 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1857 {
1858         /* Calculate the display Data return Bandwidth */
1859         fixed20_12 return_efficiency; /* 0.8 */
1860         fixed20_12 sclk, bandwidth;
1861         fixed20_12 a;
1862
1863         a.full = dfixed_const(1000);
1864         sclk.full = dfixed_const(wm->sclk);
1865         sclk.full = dfixed_div(sclk, a);
1866         a.full = dfixed_const(10);
1867         return_efficiency.full = dfixed_const(8);
1868         return_efficiency.full = dfixed_div(return_efficiency, a);
1869         a.full = dfixed_const(32);
1870         bandwidth.full = dfixed_mul(a, sclk);
1871         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1872
1873         return dfixed_trunc(bandwidth);
1874 }
1875
1876 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1877 {
1878         return 32;
1879 }
1880
1881 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1882 {
1883         /* Calculate the DMIF Request Bandwidth */
1884         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1885         fixed20_12 disp_clk, sclk, bandwidth;
1886         fixed20_12 a, b1, b2;
1887         u32 min_bandwidth;
1888
1889         a.full = dfixed_const(1000);
1890         disp_clk.full = dfixed_const(wm->disp_clk);
1891         disp_clk.full = dfixed_div(disp_clk, a);
1892         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1893         b1.full = dfixed_mul(a, disp_clk);
1894
1895         a.full = dfixed_const(1000);
1896         sclk.full = dfixed_const(wm->sclk);
1897         sclk.full = dfixed_div(sclk, a);
1898         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1899         b2.full = dfixed_mul(a, sclk);
1900
1901         a.full = dfixed_const(10);
1902         disp_clk_request_efficiency.full = dfixed_const(8);
1903         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1904
1905         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1906
1907         a.full = dfixed_const(min_bandwidth);
1908         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1909
1910         return dfixed_trunc(bandwidth);
1911 }
1912
1913 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1914 {
1915         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1916         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1917         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1918         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1919
1920         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1921 }
1922
1923 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1924 {
1925         /* Calculate the display mode Average Bandwidth
1926          * DisplayMode should contain the source and destination dimensions,
1927          * timing, etc.
1928          */
1929         fixed20_12 bpp;
1930         fixed20_12 line_time;
1931         fixed20_12 src_width;
1932         fixed20_12 bandwidth;
1933         fixed20_12 a;
1934
1935         a.full = dfixed_const(1000);
1936         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1937         line_time.full = dfixed_div(line_time, a);
1938         bpp.full = dfixed_const(wm->bytes_per_pixel);
1939         src_width.full = dfixed_const(wm->src_width);
1940         bandwidth.full = dfixed_mul(src_width, bpp);
1941         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1942         bandwidth.full = dfixed_div(bandwidth, line_time);
1943
1944         return dfixed_trunc(bandwidth);
1945 }
1946
1947 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1948 {
1949         /* First calcualte the latency in ns */
1950         u32 mc_latency = 2000; /* 2000 ns. */
1951         u32 available_bandwidth = dce6_available_bandwidth(wm);
1952         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1953         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1954         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1955         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1956                 (wm->num_heads * cursor_line_pair_return_time);
1957         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1958         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1959         u32 tmp, dmif_size = 12288;
1960         fixed20_12 a, b, c;
1961
1962         if (wm->num_heads == 0)
1963                 return 0;
1964
1965         a.full = dfixed_const(2);
1966         b.full = dfixed_const(1);
1967         if ((wm->vsc.full > a.full) ||
1968             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1969             (wm->vtaps >= 5) ||
1970             ((wm->vsc.full >= a.full) && wm->interlaced))
1971                 max_src_lines_per_dst_line = 4;
1972         else
1973                 max_src_lines_per_dst_line = 2;
1974
1975         a.full = dfixed_const(available_bandwidth);
1976         b.full = dfixed_const(wm->num_heads);
1977         a.full = dfixed_div(a, b);
1978
1979         b.full = dfixed_const(mc_latency + 512);
1980         c.full = dfixed_const(wm->disp_clk);
1981         b.full = dfixed_div(b, c);
1982
1983         c.full = dfixed_const(dmif_size);
1984         b.full = dfixed_div(c, b);
1985
1986         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1987
1988         b.full = dfixed_const(1000);
1989         c.full = dfixed_const(wm->disp_clk);
1990         b.full = dfixed_div(c, b);
1991         c.full = dfixed_const(wm->bytes_per_pixel);
1992         b.full = dfixed_mul(b, c);
1993
1994         lb_fill_bw = min(tmp, dfixed_trunc(b));
1995
1996         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1997         b.full = dfixed_const(1000);
1998         c.full = dfixed_const(lb_fill_bw);
1999         b.full = dfixed_div(c, b);
2000         a.full = dfixed_div(a, b);
2001         line_fill_time = dfixed_trunc(a);
2002
2003         if (line_fill_time < wm->active_time)
2004                 return latency;
2005         else
2006                 return latency + (line_fill_time - wm->active_time);
2007
2008 }
2009
2010 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2011 {
2012         if (dce6_average_bandwidth(wm) <=
2013             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2014                 return true;
2015         else
2016                 return false;
2017 };
2018
2019 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2020 {
2021         if (dce6_average_bandwidth(wm) <=
2022             (dce6_available_bandwidth(wm) / wm->num_heads))
2023                 return true;
2024         else
2025                 return false;
2026 };
2027
2028 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2029 {
2030         u32 lb_partitions = wm->lb_size / wm->src_width;
2031         u32 line_time = wm->active_time + wm->blank_time;
2032         u32 latency_tolerant_lines;
2033         u32 latency_hiding;
2034         fixed20_12 a;
2035
2036         a.full = dfixed_const(1);
2037         if (wm->vsc.full > a.full)
2038                 latency_tolerant_lines = 1;
2039         else {
2040                 if (lb_partitions <= (wm->vtaps + 1))
2041                         latency_tolerant_lines = 1;
2042                 else
2043                         latency_tolerant_lines = 2;
2044         }
2045
2046         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2047
2048         if (dce6_latency_watermark(wm) <= latency_hiding)
2049                 return true;
2050         else
2051                 return false;
2052 }
2053
2054 static void dce6_program_watermarks(struct radeon_device *rdev,
2055                                          struct radeon_crtc *radeon_crtc,
2056                                          u32 lb_size, u32 num_heads)
2057 {
2058         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2059         struct dce6_wm_params wm_low, wm_high;
2060         u32 dram_channels;
2061         u32 pixel_period;
2062         u32 line_time = 0;
2063         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2064         u32 priority_a_mark = 0, priority_b_mark = 0;
2065         u32 priority_a_cnt = PRIORITY_OFF;
2066         u32 priority_b_cnt = PRIORITY_OFF;
2067         u32 tmp, arb_control3;
2068         fixed20_12 a, b, c;
2069
2070         if (radeon_crtc->base.enabled && num_heads && mode) {
2071                 pixel_period = 1000000 / (u32)mode->clock;
2072                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2073                 priority_a_cnt = 0;
2074                 priority_b_cnt = 0;
2075
2076                 if (rdev->family == CHIP_ARUBA)
2077                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2078                 else
2079                         dram_channels = si_get_number_of_dram_channels(rdev);
2080
2081                 /* watermark for high clocks */
2082                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2083                         wm_high.yclk =
2084                                 radeon_dpm_get_mclk(rdev, false) * 10;
2085                         wm_high.sclk =
2086                                 radeon_dpm_get_sclk(rdev, false) * 10;
2087                 } else {
2088                         wm_high.yclk = rdev->pm.current_mclk * 10;
2089                         wm_high.sclk = rdev->pm.current_sclk * 10;
2090                 }
2091
2092                 wm_high.disp_clk = mode->clock;
2093                 wm_high.src_width = mode->crtc_hdisplay;
2094                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2095                 wm_high.blank_time = line_time - wm_high.active_time;
2096                 wm_high.interlaced = false;
2097                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2098                         wm_high.interlaced = true;
2099                 wm_high.vsc = radeon_crtc->vsc;
2100                 wm_high.vtaps = 1;
2101                 if (radeon_crtc->rmx_type != RMX_OFF)
2102                         wm_high.vtaps = 2;
2103                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2104                 wm_high.lb_size = lb_size;
2105                 wm_high.dram_channels = dram_channels;
2106                 wm_high.num_heads = num_heads;
2107
2108                 /* watermark for low clocks */
2109                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2110                         wm_low.yclk =
2111                                 radeon_dpm_get_mclk(rdev, true) * 10;
2112                         wm_low.sclk =
2113                                 radeon_dpm_get_sclk(rdev, true) * 10;
2114                 } else {
2115                         wm_low.yclk = rdev->pm.current_mclk * 10;
2116                         wm_low.sclk = rdev->pm.current_sclk * 10;
2117                 }
2118
2119                 wm_low.disp_clk = mode->clock;
2120                 wm_low.src_width = mode->crtc_hdisplay;
2121                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2122                 wm_low.blank_time = line_time - wm_low.active_time;
2123                 wm_low.interlaced = false;
2124                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2125                         wm_low.interlaced = true;
2126                 wm_low.vsc = radeon_crtc->vsc;
2127                 wm_low.vtaps = 1;
2128                 if (radeon_crtc->rmx_type != RMX_OFF)
2129                         wm_low.vtaps = 2;
2130                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2131                 wm_low.lb_size = lb_size;
2132                 wm_low.dram_channels = dram_channels;
2133                 wm_low.num_heads = num_heads;
2134
2135                 /* set for high clocks */
2136                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2137                 /* set for low clocks */
2138                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2139
2140                 /* possibly force display priority to high */
2141                 /* should really do this at mode validation time... */
2142                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2143                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2144                     !dce6_check_latency_hiding(&wm_high) ||
2145                     (rdev->disp_priority == 2)) {
2146                         DRM_DEBUG_KMS("force priority to high\n");
2147                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2148                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2149                 }
2150                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2151                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2152                     !dce6_check_latency_hiding(&wm_low) ||
2153                     (rdev->disp_priority == 2)) {
2154                         DRM_DEBUG_KMS("force priority to high\n");
2155                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2156                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2157                 }
2158
2159                 a.full = dfixed_const(1000);
2160                 b.full = dfixed_const(mode->clock);
2161                 b.full = dfixed_div(b, a);
2162                 c.full = dfixed_const(latency_watermark_a);
2163                 c.full = dfixed_mul(c, b);
2164                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2165                 c.full = dfixed_div(c, a);
2166                 a.full = dfixed_const(16);
2167                 c.full = dfixed_div(c, a);
2168                 priority_a_mark = dfixed_trunc(c);
2169                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2170
2171                 a.full = dfixed_const(1000);
2172                 b.full = dfixed_const(mode->clock);
2173                 b.full = dfixed_div(b, a);
2174                 c.full = dfixed_const(latency_watermark_b);
2175                 c.full = dfixed_mul(c, b);
2176                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2177                 c.full = dfixed_div(c, a);
2178                 a.full = dfixed_const(16);
2179                 c.full = dfixed_div(c, a);
2180                 priority_b_mark = dfixed_trunc(c);
2181                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2182         }
2183
2184         /* select wm A */
2185         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2186         tmp = arb_control3;
2187         tmp &= ~LATENCY_WATERMARK_MASK(3);
2188         tmp |= LATENCY_WATERMARK_MASK(1);
2189         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2190         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2191                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2192                 LATENCY_HIGH_WATERMARK(line_time)));
2193         /* select wm B */
2194         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2195         tmp &= ~LATENCY_WATERMARK_MASK(3);
2196         tmp |= LATENCY_WATERMARK_MASK(2);
2197         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2198         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2199                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2200                 LATENCY_HIGH_WATERMARK(line_time)));
2201         /* restore original selection */
2202         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2203
2204         /* write the priority marks */
2205         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2206         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2207
2208         /* save values for DPM */
2209         radeon_crtc->line_time = line_time;
2210         radeon_crtc->wm_high = latency_watermark_a;
2211         radeon_crtc->wm_low = latency_watermark_b;
2212 }
2213
2214 void dce6_bandwidth_update(struct radeon_device *rdev)
2215 {
2216         struct drm_display_mode *mode0 = NULL;
2217         struct drm_display_mode *mode1 = NULL;
2218         u32 num_heads = 0, lb_size;
2219         int i;
2220
2221         radeon_update_display_priority(rdev);
2222
2223         for (i = 0; i < rdev->num_crtc; i++) {
2224                 if (rdev->mode_info.crtcs[i]->base.enabled)
2225                         num_heads++;
2226         }
2227         for (i = 0; i < rdev->num_crtc; i += 2) {
2228                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2229                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2230                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2231                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2232                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2233                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2234         }
2235 }
2236
2237 /*
2238  * Core functions
2239  */
2240 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2241 {
2242         const u32 num_tile_mode_states = 32;
2243         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2244
2245         switch (rdev->config.si.mem_row_size_in_kb) {
2246         case 1:
2247                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2248                 break;
2249         case 2:
2250         default:
2251                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2252                 break;
2253         case 4:
2254                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2255                 break;
2256         }
2257
2258         if ((rdev->family == CHIP_TAHITI) ||
2259             (rdev->family == CHIP_PITCAIRN)) {
2260                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2261                         switch (reg_offset) {
2262                         case 0:  /* non-AA compressed depth or any compressed stencil */
2263                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2266                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2267                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2268                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2271                                 break;
2272                         case 1:  /* 2xAA/4xAA compressed depth only */
2273                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2275                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2276                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2277                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2278                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2281                                 break;
2282                         case 2:  /* 8xAA compressed depth only */
2283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2285                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2286                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2287                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2288                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2291                                 break;
2292                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2293                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2295                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2296                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2298                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2300                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2301                                 break;
2302                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2303                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2304                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2305                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2306                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2307                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2308                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2310                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2311                                 break;
2312                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2313                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2315                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2316                                                  TILE_SPLIT(split_equal_to_row_size) |
2317                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2318                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2321                                 break;
2322                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2325                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326                                                  TILE_SPLIT(split_equal_to_row_size) |
2327                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2328                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2331                                 break;
2332                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336                                                  TILE_SPLIT(split_equal_to_row_size) |
2337                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2338                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2341                                 break;
2342                         case 8:  /* 1D and 1D Array Surfaces */
2343                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2344                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2346                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2347                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2348                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2350                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2351                                 break;
2352                         case 9:  /* Displayable maps. */
2353                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2356                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2357                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2358                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2361                                 break;
2362                         case 10:  /* Display 8bpp. */
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2367                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2368                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2371                                 break;
2372                         case 11:  /* Display 16bpp. */
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2376                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2377                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2378                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2381                                 break;
2382                         case 12:  /* Display 32bpp. */
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2386                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2387                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2388                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2391                                 break;
2392                         case 13:  /* Thin. */
2393                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2394                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2395                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2396                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2397                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2398                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2401                                 break;
2402                         case 14:  /* Thin 8 bpp. */
2403                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2406                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2407                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2408                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2410                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2411                                 break;
2412                         case 15:  /* Thin 16 bpp. */
2413                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2415                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2416                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2417                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2418                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2420                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2421                                 break;
2422                         case 16:  /* Thin 32 bpp. */
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2426                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2427                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2428                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2431                                 break;
2432                         case 17:  /* Thin 64 bpp. */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(split_equal_to_row_size) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2441                                 break;
2442                         case 21:  /* 8 bpp PRT. */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 22:  /* 16 bpp PRT */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2461                                 break;
2462                         case 23:  /* 32 bpp PRT */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2467                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2471                                 break;
2472                         case 24:  /* 64 bpp PRT */
2473                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2474                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2475                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2476                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2477                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2478                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2481                                 break;
2482                         case 25:  /* 128 bpp PRT */
2483                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2485                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2487                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2488                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2491                                 break;
2492                         default:
2493                                 gb_tile_moden = 0;
2494                                 break;
2495                         }
2496                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2497                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2498                 }
2499         } else if ((rdev->family == CHIP_VERDE) ||
2500                    (rdev->family == CHIP_OLAND) ||
2501                    (rdev->family == CHIP_HAINAN)) {
2502                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2503                         switch (reg_offset) {
2504                         case 0:  /* non-AA compressed depth or any compressed stencil */
2505                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2508                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2509                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2510                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2513                                 break;
2514                         case 1:  /* 2xAA/4xAA compressed depth only */
2515                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2518                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2520                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2523                                 break;
2524                         case 2:  /* 8xAA compressed depth only */
2525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2530                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2533                                 break;
2534                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2535                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2537                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2538                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2539                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2540                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2542                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2543                                 break;
2544                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2548                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2549                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2550                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2552                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553                                 break;
2554                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558                                                  TILE_SPLIT(split_equal_to_row_size) |
2559                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2560                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563                                 break;
2564                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2565                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2567                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568                                                  TILE_SPLIT(split_equal_to_row_size) |
2569                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2570                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573                                 break;
2574                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2575                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2577                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578                                                  TILE_SPLIT(split_equal_to_row_size) |
2579                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2580                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2583                                 break;
2584                         case 8:  /* 1D and 1D Array Surfaces */
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2589                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2590                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2593                                 break;
2594                         case 9:  /* Displayable maps. */
2595                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2596                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2599                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2600                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603                                 break;
2604                         case 10:  /* Display 8bpp. */
2605                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2610                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2613                                 break;
2614                         case 11:  /* Display 16bpp. */
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2620                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623                                 break;
2624                         case 12:  /* Display 32bpp. */
2625                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2629                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2630                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633                                 break;
2634                         case 13:  /* Thin. */
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2639                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2640                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643                                 break;
2644                         case 14:  /* Thin 8 bpp. */
2645                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2649                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2650                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653                                 break;
2654                         case 15:  /* Thin 16 bpp. */
2655                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2659                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2660                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2662                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663                                 break;
2664                         case 16:  /* Thin 32 bpp. */
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2669                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2670                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673                                 break;
2674                         case 17:  /* Thin 64 bpp. */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                                                  TILE_SPLIT(split_equal_to_row_size) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2683                                 break;
2684                         case 21:  /* 8 bpp PRT. */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693                                 break;
2694                         case 22:  /* 16 bpp PRT */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2703                                 break;
2704                         case 23:  /* 32 bpp PRT */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2713                                 break;
2714                         case 24:  /* 64 bpp PRT */
2715                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2717                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2718                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2719                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2720                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2723                                 break;
2724                         case 25:  /* 128 bpp PRT */
2725                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2727                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2728                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2729                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2730                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2733                                 break;
2734                         default:
2735                                 gb_tile_moden = 0;
2736                                 break;
2737                         }
2738                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2739                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2740                 }
2741         } else
2742                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2743 }
2744
2745 static void si_select_se_sh(struct radeon_device *rdev,
2746                             u32 se_num, u32 sh_num)
2747 {
2748         u32 data = INSTANCE_BROADCAST_WRITES;
2749
2750         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2751                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2752         else if (se_num == 0xffffffff)
2753                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2754         else if (sh_num == 0xffffffff)
2755                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2756         else
2757                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2758         WREG32(GRBM_GFX_INDEX, data);
2759 }
2760
2761 static u32 si_create_bitmask(u32 bit_width)
2762 {
2763         u32 i, mask = 0;
2764
2765         for (i = 0; i < bit_width; i++) {
2766                 mask <<= 1;
2767                 mask |= 1;
2768         }
2769         return mask;
2770 }
2771
2772 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2773 {
2774         u32 data, mask;
2775
2776         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2777         if (data & 1)
2778                 data &= INACTIVE_CUS_MASK;
2779         else
2780                 data = 0;
2781         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2782
2783         data >>= INACTIVE_CUS_SHIFT;
2784
2785         mask = si_create_bitmask(cu_per_sh);
2786
2787         return ~data & mask;
2788 }
2789
2790 static void si_setup_spi(struct radeon_device *rdev,
2791                          u32 se_num, u32 sh_per_se,
2792                          u32 cu_per_sh)
2793 {
2794         int i, j, k;
2795         u32 data, mask, active_cu;
2796
2797         for (i = 0; i < se_num; i++) {
2798                 for (j = 0; j < sh_per_se; j++) {
2799                         si_select_se_sh(rdev, i, j);
2800                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2801                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2802
2803                         mask = 1;
2804                         for (k = 0; k < 16; k++) {
2805                                 mask <<= k;
2806                                 if (active_cu & mask) {
2807                                         data &= ~mask;
2808                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2809                                         break;
2810                                 }
2811                         }
2812                 }
2813         }
2814         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2815 }
2816
2817 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2818                               u32 max_rb_num, u32 se_num,
2819                               u32 sh_per_se)
2820 {
2821         u32 data, mask;
2822
2823         data = RREG32(CC_RB_BACKEND_DISABLE);
2824         if (data & 1)
2825                 data &= BACKEND_DISABLE_MASK;
2826         else
2827                 data = 0;
2828         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2829
2830         data >>= BACKEND_DISABLE_SHIFT;
2831
2832         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2833
2834         return data & mask;
2835 }
2836
2837 static void si_setup_rb(struct radeon_device *rdev,
2838                         u32 se_num, u32 sh_per_se,
2839                         u32 max_rb_num)
2840 {
2841         int i, j;
2842         u32 data, mask;
2843         u32 disabled_rbs = 0;
2844         u32 enabled_rbs = 0;
2845
2846         for (i = 0; i < se_num; i++) {
2847                 for (j = 0; j < sh_per_se; j++) {
2848                         si_select_se_sh(rdev, i, j);
2849                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2850                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2851                 }
2852         }
2853         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2854
2855         mask = 1;
2856         for (i = 0; i < max_rb_num; i++) {
2857                 if (!(disabled_rbs & mask))
2858                         enabled_rbs |= mask;
2859                 mask <<= 1;
2860         }
2861
2862         for (i = 0; i < se_num; i++) {
2863                 si_select_se_sh(rdev, i, 0xffffffff);
2864                 data = 0;
2865                 for (j = 0; j < sh_per_se; j++) {
2866                         switch (enabled_rbs & 3) {
2867                         case 1:
2868                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2869                                 break;
2870                         case 2:
2871                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2872                                 break;
2873                         case 3:
2874                         default:
2875                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2876                                 break;
2877                         }
2878                         enabled_rbs >>= 2;
2879                 }
2880                 WREG32(PA_SC_RASTER_CONFIG, data);
2881         }
2882         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2883 }
2884
2885 static void si_gpu_init(struct radeon_device *rdev)
2886 {
2887         u32 gb_addr_config = 0;
2888         u32 mc_shared_chmap, mc_arb_ramcfg;
2889         u32 sx_debug_1;
2890         u32 hdp_host_path_cntl;
2891         u32 tmp;
2892         int i, j;
2893
2894         switch (rdev->family) {
2895         case CHIP_TAHITI:
2896                 rdev->config.si.max_shader_engines = 2;
2897                 rdev->config.si.max_tile_pipes = 12;
2898                 rdev->config.si.max_cu_per_sh = 8;
2899                 rdev->config.si.max_sh_per_se = 2;
2900                 rdev->config.si.max_backends_per_se = 4;
2901                 rdev->config.si.max_texture_channel_caches = 12;
2902                 rdev->config.si.max_gprs = 256;
2903                 rdev->config.si.max_gs_threads = 32;
2904                 rdev->config.si.max_hw_contexts = 8;
2905
2906                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2907                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2908                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2909                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2910                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2911                 break;
2912         case CHIP_PITCAIRN:
2913                 rdev->config.si.max_shader_engines = 2;
2914                 rdev->config.si.max_tile_pipes = 8;
2915                 rdev->config.si.max_cu_per_sh = 5;
2916                 rdev->config.si.max_sh_per_se = 2;
2917                 rdev->config.si.max_backends_per_se = 4;
2918                 rdev->config.si.max_texture_channel_caches = 8;
2919                 rdev->config.si.max_gprs = 256;
2920                 rdev->config.si.max_gs_threads = 32;
2921                 rdev->config.si.max_hw_contexts = 8;
2922
2923                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2924                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2925                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2926                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2927                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2928                 break;
2929         case CHIP_VERDE:
2930         default:
2931                 rdev->config.si.max_shader_engines = 1;
2932                 rdev->config.si.max_tile_pipes = 4;
2933                 rdev->config.si.max_cu_per_sh = 5;
2934                 rdev->config.si.max_sh_per_se = 2;
2935                 rdev->config.si.max_backends_per_se = 4;
2936                 rdev->config.si.max_texture_channel_caches = 4;
2937                 rdev->config.si.max_gprs = 256;
2938                 rdev->config.si.max_gs_threads = 32;
2939                 rdev->config.si.max_hw_contexts = 8;
2940
2941                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2942                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2943                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2944                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2945                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2946                 break;
2947         case CHIP_OLAND:
2948                 rdev->config.si.max_shader_engines = 1;
2949                 rdev->config.si.max_tile_pipes = 4;
2950                 rdev->config.si.max_cu_per_sh = 6;
2951                 rdev->config.si.max_sh_per_se = 1;
2952                 rdev->config.si.max_backends_per_se = 2;
2953                 rdev->config.si.max_texture_channel_caches = 4;
2954                 rdev->config.si.max_gprs = 256;
2955                 rdev->config.si.max_gs_threads = 16;
2956                 rdev->config.si.max_hw_contexts = 8;
2957
2958                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2959                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2960                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2961                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2962                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2963                 break;
2964         case CHIP_HAINAN:
2965                 rdev->config.si.max_shader_engines = 1;
2966                 rdev->config.si.max_tile_pipes = 4;
2967                 rdev->config.si.max_cu_per_sh = 5;
2968                 rdev->config.si.max_sh_per_se = 1;
2969                 rdev->config.si.max_backends_per_se = 1;
2970                 rdev->config.si.max_texture_channel_caches = 2;
2971                 rdev->config.si.max_gprs = 256;
2972                 rdev->config.si.max_gs_threads = 16;
2973                 rdev->config.si.max_hw_contexts = 8;
2974
2975                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2976                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2977                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2978                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2979                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2980                 break;
2981         }
2982
2983         /* Initialize HDP */
2984         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2985                 WREG32((0x2c14 + j), 0x00000000);
2986                 WREG32((0x2c18 + j), 0x00000000);
2987                 WREG32((0x2c1c + j), 0x00000000);
2988                 WREG32((0x2c20 + j), 0x00000000);
2989                 WREG32((0x2c24 + j), 0x00000000);
2990         }
2991
2992         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2993
2994         evergreen_fix_pci_max_read_req_size(rdev);
2995
2996         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2997
2998         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2999         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3000
3001         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3002         rdev->config.si.mem_max_burst_length_bytes = 256;
3003         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3004         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3005         if (rdev->config.si.mem_row_size_in_kb > 4)
3006                 rdev->config.si.mem_row_size_in_kb = 4;
3007         /* XXX use MC settings? */
3008         rdev->config.si.shader_engine_tile_size = 32;
3009         rdev->config.si.num_gpus = 1;
3010         rdev->config.si.multi_gpu_tile_size = 64;
3011
3012         /* fix up row size */
3013         gb_addr_config &= ~ROW_SIZE_MASK;
3014         switch (rdev->config.si.mem_row_size_in_kb) {
3015         case 1:
3016         default:
3017                 gb_addr_config |= ROW_SIZE(0);
3018                 break;
3019         case 2:
3020                 gb_addr_config |= ROW_SIZE(1);
3021                 break;
3022         case 4:
3023                 gb_addr_config |= ROW_SIZE(2);
3024                 break;
3025         }
3026
3027         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3028          * not have bank info, so create a custom tiling dword.
3029          * bits 3:0   num_pipes
3030          * bits 7:4   num_banks
3031          * bits 11:8  group_size
3032          * bits 15:12 row_size
3033          */
3034         rdev->config.si.tile_config = 0;
3035         switch (rdev->config.si.num_tile_pipes) {
3036         case 1:
3037                 rdev->config.si.tile_config |= (0 << 0);
3038                 break;
3039         case 2:
3040                 rdev->config.si.tile_config |= (1 << 0);
3041                 break;
3042         case 4:
3043                 rdev->config.si.tile_config |= (2 << 0);
3044                 break;
3045         case 8:
3046         default:
3047                 /* XXX what about 12? */
3048                 rdev->config.si.tile_config |= (3 << 0);
3049                 break;
3050         }       
3051         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3052         case 0: /* four banks */
3053                 rdev->config.si.tile_config |= 0 << 4;
3054                 break;
3055         case 1: /* eight banks */
3056                 rdev->config.si.tile_config |= 1 << 4;
3057                 break;
3058         case 2: /* sixteen banks */
3059         default:
3060                 rdev->config.si.tile_config |= 2 << 4;
3061                 break;
3062         }
3063         rdev->config.si.tile_config |=
3064                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3065         rdev->config.si.tile_config |=
3066                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3067
3068         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3069         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3070         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3071         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3072         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3073         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3074         if (rdev->has_uvd) {
3075                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3076                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3077                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3078         }
3079
3080         si_tiling_mode_table_init(rdev);
3081
3082         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3083                     rdev->config.si.max_sh_per_se,
3084                     rdev->config.si.max_backends_per_se);
3085
3086         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3087                      rdev->config.si.max_sh_per_se,
3088                      rdev->config.si.max_cu_per_sh);
3089
3090
3091         /* set HW defaults for 3D engine */
3092         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3093                                      ROQ_IB2_START(0x2b)));
3094         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3095
3096         sx_debug_1 = RREG32(SX_DEBUG_1);
3097         WREG32(SX_DEBUG_1, sx_debug_1);
3098
3099         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3100
3101         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3102                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3103                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3104                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3105
3106         WREG32(VGT_NUM_INSTANCES, 1);
3107
3108         WREG32(CP_PERFMON_CNTL, 0);
3109
3110         WREG32(SQ_CONFIG, 0);
3111
3112         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3113                                           FORCE_EOV_MAX_REZ_CNT(255)));
3114
3115         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3116                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3117
3118         WREG32(VGT_GS_VERTEX_REUSE, 16);
3119         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3120
3121         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3122         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3123         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3124         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3125         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3126         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3127         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3128         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3129
3130         tmp = RREG32(HDP_MISC_CNTL);
3131         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3132         WREG32(HDP_MISC_CNTL, tmp);
3133
3134         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3135         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3136
3137         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3138
3139         udelay(50);
3140 }
3141
3142 /*
3143  * GPU scratch registers helpers function.
3144  */
3145 static void si_scratch_init(struct radeon_device *rdev)
3146 {
3147         int i;
3148
3149         rdev->scratch.num_reg = 7;
3150         rdev->scratch.reg_base = SCRATCH_REG0;
3151         for (i = 0; i < rdev->scratch.num_reg; i++) {
3152                 rdev->scratch.free[i] = true;
3153                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3154         }
3155 }
3156
3157 void si_fence_ring_emit(struct radeon_device *rdev,
3158                         struct radeon_fence *fence)
3159 {
3160         struct radeon_ring *ring = &rdev->ring[fence->ring];
3161         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3162
3163         /* flush read cache over gart */
3164         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3165         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3166         radeon_ring_write(ring, 0);
3167         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3168         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3169                           PACKET3_TC_ACTION_ENA |
3170                           PACKET3_SH_KCACHE_ACTION_ENA |
3171                           PACKET3_SH_ICACHE_ACTION_ENA);
3172         radeon_ring_write(ring, 0xFFFFFFFF);
3173         radeon_ring_write(ring, 0);
3174         radeon_ring_write(ring, 10); /* poll interval */
3175         /* EVENT_WRITE_EOP - flush caches, send int */
3176         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3177         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3178         radeon_ring_write(ring, addr & 0xffffffff);
3179         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3180         radeon_ring_write(ring, fence->seq);
3181         radeon_ring_write(ring, 0);
3182 }
3183
3184 /*
3185  * IB stuff
3186  */
3187 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3188 {
3189         struct radeon_ring *ring = &rdev->ring[ib->ring];
3190         u32 header;
3191
3192         if (ib->is_const_ib) {
3193                 /* set switch buffer packet before const IB */
3194                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3195                 radeon_ring_write(ring, 0);
3196
3197                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3198         } else {
3199                 u32 next_rptr;
3200                 if (ring->rptr_save_reg) {
3201                         next_rptr = ring->wptr + 3 + 4 + 8;
3202                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3203                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3204                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3205                         radeon_ring_write(ring, next_rptr);
3206                 } else if (rdev->wb.enabled) {
3207                         next_rptr = ring->wptr + 5 + 4 + 8;
3208                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3209                         radeon_ring_write(ring, (1 << 8));
3210                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212                         radeon_ring_write(ring, next_rptr);
3213                 }
3214
3215                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3216         }
3217
3218         radeon_ring_write(ring, header);
3219         radeon_ring_write(ring,
3220 #ifdef __BIG_ENDIAN
3221                           (2 << 0) |
3222 #endif
3223                           (ib->gpu_addr & 0xFFFFFFFC));
3224         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3225         radeon_ring_write(ring, ib->length_dw |
3226                           (ib->vm ? (ib->vm->id << 24) : 0));
3227
3228         if (!ib->is_const_ib) {
3229                 /* flush read cache over gart for this vmid */
3230                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3231                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3232                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3233                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3234                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3235                                   PACKET3_TC_ACTION_ENA |
3236                                   PACKET3_SH_KCACHE_ACTION_ENA |
3237                                   PACKET3_SH_ICACHE_ACTION_ENA);
3238                 radeon_ring_write(ring, 0xFFFFFFFF);
3239                 radeon_ring_write(ring, 0);
3240                 radeon_ring_write(ring, 10); /* poll interval */
3241         }
3242 }
3243
3244 /*
3245  * CP.
3246  */
3247 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3248 {
3249         if (enable)
3250                 WREG32(CP_ME_CNTL, 0);
3251         else {
3252                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3253                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3254                 WREG32(SCRATCH_UMSK, 0);
3255                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3256                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3257                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3258         }
3259         udelay(50);
3260 }
3261
3262 static int si_cp_load_microcode(struct radeon_device *rdev)
3263 {
3264         const __be32 *fw_data;
3265         int i;
3266
3267         if (!rdev->me_fw || !rdev->pfp_fw)
3268                 return -EINVAL;
3269
3270         si_cp_enable(rdev, false);
3271
3272         /* PFP */
3273         fw_data = (const __be32 *)rdev->pfp_fw->data;
3274         WREG32(CP_PFP_UCODE_ADDR, 0);
3275         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3276                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3277         WREG32(CP_PFP_UCODE_ADDR, 0);
3278
3279         /* CE */
3280         fw_data = (const __be32 *)rdev->ce_fw->data;
3281         WREG32(CP_CE_UCODE_ADDR, 0);
3282         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3283                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3284         WREG32(CP_CE_UCODE_ADDR, 0);
3285
3286         /* ME */
3287         fw_data = (const __be32 *)rdev->me_fw->data;
3288         WREG32(CP_ME_RAM_WADDR, 0);
3289         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3290                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3291         WREG32(CP_ME_RAM_WADDR, 0);
3292
3293         WREG32(CP_PFP_UCODE_ADDR, 0);
3294         WREG32(CP_CE_UCODE_ADDR, 0);
3295         WREG32(CP_ME_RAM_WADDR, 0);
3296         WREG32(CP_ME_RAM_RADDR, 0);
3297         return 0;
3298 }
3299
3300 static int si_cp_start(struct radeon_device *rdev)
3301 {
3302         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3303         int r, i;
3304
3305         r = radeon_ring_lock(rdev, ring, 7 + 4);
3306         if (r) {
3307                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3308                 return r;
3309         }
3310         /* init the CP */
3311         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3312         radeon_ring_write(ring, 0x1);
3313         radeon_ring_write(ring, 0x0);
3314         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3315         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3316         radeon_ring_write(ring, 0);
3317         radeon_ring_write(ring, 0);
3318
3319         /* init the CE partitions */
3320         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3321         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3322         radeon_ring_write(ring, 0xc000);
3323         radeon_ring_write(ring, 0xe000);
3324         radeon_ring_unlock_commit(rdev, ring);
3325
3326         si_cp_enable(rdev, true);
3327
3328         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3329         if (r) {
3330                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3331                 return r;
3332         }
3333
3334         /* setup clear context state */
3335         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3336         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3337
3338         for (i = 0; i < si_default_size; i++)
3339                 radeon_ring_write(ring, si_default_state[i]);
3340
3341         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3343
3344         /* set clear context state */
3345         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3346         radeon_ring_write(ring, 0);
3347
3348         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3349         radeon_ring_write(ring, 0x00000316);
3350         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3351         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3352
3353         radeon_ring_unlock_commit(rdev, ring);
3354
3355         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3356                 ring = &rdev->ring[i];
3357                 r = radeon_ring_lock(rdev, ring, 2);
3358
3359                 /* clear the compute context state */
3360                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3361                 radeon_ring_write(ring, 0);
3362
3363                 radeon_ring_unlock_commit(rdev, ring);
3364         }
3365
3366         return 0;
3367 }
3368
3369 static void si_cp_fini(struct radeon_device *rdev)
3370 {
3371         struct radeon_ring *ring;
3372         si_cp_enable(rdev, false);
3373
3374         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3375         radeon_ring_fini(rdev, ring);
3376         radeon_scratch_free(rdev, ring->rptr_save_reg);
3377
3378         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3379         radeon_ring_fini(rdev, ring);
3380         radeon_scratch_free(rdev, ring->rptr_save_reg);
3381
3382         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3383         radeon_ring_fini(rdev, ring);
3384         radeon_scratch_free(rdev, ring->rptr_save_reg);
3385 }
3386
3387 static int si_cp_resume(struct radeon_device *rdev)
3388 {
3389         struct radeon_ring *ring;
3390         u32 tmp;
3391         u32 rb_bufsz;
3392         int r;
3393
3394         si_enable_gui_idle_interrupt(rdev, false);
3395
3396         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3397         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3398
3399         /* Set the write pointer delay */
3400         WREG32(CP_RB_WPTR_DELAY, 0);
3401
3402         WREG32(CP_DEBUG, 0);
3403         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3404
3405         /* ring 0 - compute and gfx */
3406         /* Set ring buffer size */
3407         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3408         rb_bufsz = order_base_2(ring->ring_size / 8);
3409         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3410 #ifdef __BIG_ENDIAN
3411         tmp |= BUF_SWAP_32BIT;
3412 #endif
3413         WREG32(CP_RB0_CNTL, tmp);
3414
3415         /* Initialize the ring buffer's read and write pointers */
3416         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3417         ring->wptr = 0;
3418         WREG32(CP_RB0_WPTR, ring->wptr);
3419
3420         /* set the wb address whether it's enabled or not */
3421         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3422         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3423
3424         if (rdev->wb.enabled)
3425                 WREG32(SCRATCH_UMSK, 0xff);
3426         else {
3427                 tmp |= RB_NO_UPDATE;
3428                 WREG32(SCRATCH_UMSK, 0);
3429         }
3430
3431         mdelay(1);
3432         WREG32(CP_RB0_CNTL, tmp);
3433
3434         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3435
3436         ring->rptr = RREG32(CP_RB0_RPTR);
3437
3438         /* ring1  - compute only */
3439         /* Set ring buffer size */
3440         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3441         rb_bufsz = order_base_2(ring->ring_size / 8);
3442         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3443 #ifdef __BIG_ENDIAN
3444         tmp |= BUF_SWAP_32BIT;
3445 #endif
3446         WREG32(CP_RB1_CNTL, tmp);
3447
3448         /* Initialize the ring buffer's read and write pointers */
3449         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3450         ring->wptr = 0;
3451         WREG32(CP_RB1_WPTR, ring->wptr);
3452
3453         /* set the wb address whether it's enabled or not */
3454         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3455         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3456
3457         mdelay(1);
3458         WREG32(CP_RB1_CNTL, tmp);
3459
3460         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3461
3462         ring->rptr = RREG32(CP_RB1_RPTR);
3463
3464         /* ring2 - compute only */
3465         /* Set ring buffer size */
3466         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3467         rb_bufsz = order_base_2(ring->ring_size / 8);
3468         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3469 #ifdef __BIG_ENDIAN
3470         tmp |= BUF_SWAP_32BIT;
3471 #endif
3472         WREG32(CP_RB2_CNTL, tmp);
3473
3474         /* Initialize the ring buffer's read and write pointers */
3475         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3476         ring->wptr = 0;
3477         WREG32(CP_RB2_WPTR, ring->wptr);
3478
3479         /* set the wb address whether it's enabled or not */
3480         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3481         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3482
3483         mdelay(1);
3484         WREG32(CP_RB2_CNTL, tmp);
3485
3486         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3487
3488         ring->rptr = RREG32(CP_RB2_RPTR);
3489
3490         /* start the rings */
3491         si_cp_start(rdev);
3492         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3493         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3494         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3495         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3496         if (r) {
3497                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3498                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3499                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3500                 return r;
3501         }
3502         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3503         if (r) {
3504                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3505         }
3506         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3507         if (r) {
3508                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3509         }
3510
3511         si_enable_gui_idle_interrupt(rdev, true);
3512
3513         return 0;
3514 }
3515
3516 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3517 {
3518         u32 reset_mask = 0;
3519         u32 tmp;
3520
3521         /* GRBM_STATUS */
3522         tmp = RREG32(GRBM_STATUS);
3523         if (tmp & (PA_BUSY | SC_BUSY |
3524                    BCI_BUSY | SX_BUSY |
3525                    TA_BUSY | VGT_BUSY |
3526                    DB_BUSY | CB_BUSY |
3527                    GDS_BUSY | SPI_BUSY |
3528                    IA_BUSY | IA_BUSY_NO_DMA))
3529                 reset_mask |= RADEON_RESET_GFX;
3530
3531         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3532                    CP_BUSY | CP_COHERENCY_BUSY))
3533                 reset_mask |= RADEON_RESET_CP;
3534
3535         if (tmp & GRBM_EE_BUSY)
3536                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3537
3538         /* GRBM_STATUS2 */
3539         tmp = RREG32(GRBM_STATUS2);
3540         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3541                 reset_mask |= RADEON_RESET_RLC;
3542
3543         /* DMA_STATUS_REG 0 */
3544         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3545         if (!(tmp & DMA_IDLE))
3546                 reset_mask |= RADEON_RESET_DMA;
3547
3548         /* DMA_STATUS_REG 1 */
3549         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3550         if (!(tmp & DMA_IDLE))
3551                 reset_mask |= RADEON_RESET_DMA1;
3552
3553         /* SRBM_STATUS2 */
3554         tmp = RREG32(SRBM_STATUS2);
3555         if (tmp & DMA_BUSY)
3556                 reset_mask |= RADEON_RESET_DMA;
3557
3558         if (tmp & DMA1_BUSY)
3559                 reset_mask |= RADEON_RESET_DMA1;
3560
3561         /* SRBM_STATUS */
3562         tmp = RREG32(SRBM_STATUS);
3563
3564         if (tmp & IH_BUSY)
3565                 reset_mask |= RADEON_RESET_IH;
3566
3567         if (tmp & SEM_BUSY)
3568                 reset_mask |= RADEON_RESET_SEM;
3569
3570         if (tmp & GRBM_RQ_PENDING)
3571                 reset_mask |= RADEON_RESET_GRBM;
3572
3573         if (tmp & VMC_BUSY)
3574                 reset_mask |= RADEON_RESET_VMC;
3575
3576         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3577                    MCC_BUSY | MCD_BUSY))
3578                 reset_mask |= RADEON_RESET_MC;
3579
3580         if (evergreen_is_display_hung(rdev))
3581                 reset_mask |= RADEON_RESET_DISPLAY;
3582
3583         /* VM_L2_STATUS */
3584         tmp = RREG32(VM_L2_STATUS);
3585         if (tmp & L2_BUSY)
3586                 reset_mask |= RADEON_RESET_VMC;
3587
3588         /* Skip MC reset as it's mostly likely not hung, just busy */
3589         if (reset_mask & RADEON_RESET_MC) {
3590                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3591                 reset_mask &= ~RADEON_RESET_MC;
3592         }
3593
3594         return reset_mask;
3595 }
3596
3597 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3598 {
3599         struct evergreen_mc_save save;
3600         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3601         u32 tmp;
3602
3603         if (reset_mask == 0)
3604                 return;
3605
3606         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3607
3608         evergreen_print_gpu_status_regs(rdev);
3609         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3610                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3611         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3612                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3613
3614         /* disable PG/CG */
3615         si_fini_pg(rdev);
3616         si_fini_cg(rdev);
3617
3618         /* stop the rlc */
3619         si_rlc_stop(rdev);
3620
3621         /* Disable CP parsing/prefetching */
3622         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3623
3624         if (reset_mask & RADEON_RESET_DMA) {
3625                 /* dma0 */
3626                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3627                 tmp &= ~DMA_RB_ENABLE;
3628                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3629         }
3630         if (reset_mask & RADEON_RESET_DMA1) {
3631                 /* dma1 */
3632                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3633                 tmp &= ~DMA_RB_ENABLE;
3634                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3635         }
3636
3637         udelay(50);
3638
3639         evergreen_mc_stop(rdev, &save);
3640         if (evergreen_mc_wait_for_idle(rdev)) {
3641                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3642         }
3643
3644         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3645                 grbm_soft_reset = SOFT_RESET_CB |
3646                         SOFT_RESET_DB |
3647                         SOFT_RESET_GDS |
3648                         SOFT_RESET_PA |
3649                         SOFT_RESET_SC |
3650                         SOFT_RESET_BCI |
3651                         SOFT_RESET_SPI |
3652                         SOFT_RESET_SX |
3653                         SOFT_RESET_TC |
3654                         SOFT_RESET_TA |
3655                         SOFT_RESET_VGT |
3656                         SOFT_RESET_IA;
3657         }
3658
3659         if (reset_mask & RADEON_RESET_CP) {
3660                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3661
3662                 srbm_soft_reset |= SOFT_RESET_GRBM;
3663         }
3664
3665         if (reset_mask & RADEON_RESET_DMA)
3666                 srbm_soft_reset |= SOFT_RESET_DMA;
3667
3668         if (reset_mask & RADEON_RESET_DMA1)
3669                 srbm_soft_reset |= SOFT_RESET_DMA1;
3670
3671         if (reset_mask & RADEON_RESET_DISPLAY)
3672                 srbm_soft_reset |= SOFT_RESET_DC;
3673
3674         if (reset_mask & RADEON_RESET_RLC)
3675                 grbm_soft_reset |= SOFT_RESET_RLC;
3676
3677         if (reset_mask & RADEON_RESET_SEM)
3678                 srbm_soft_reset |= SOFT_RESET_SEM;
3679
3680         if (reset_mask & RADEON_RESET_IH)
3681                 srbm_soft_reset |= SOFT_RESET_IH;
3682
3683         if (reset_mask & RADEON_RESET_GRBM)
3684                 srbm_soft_reset |= SOFT_RESET_GRBM;
3685
3686         if (reset_mask & RADEON_RESET_VMC)
3687                 srbm_soft_reset |= SOFT_RESET_VMC;
3688
3689         if (reset_mask & RADEON_RESET_MC)
3690                 srbm_soft_reset |= SOFT_RESET_MC;
3691
3692         if (grbm_soft_reset) {
3693                 tmp = RREG32(GRBM_SOFT_RESET);
3694                 tmp |= grbm_soft_reset;
3695                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3696                 WREG32(GRBM_SOFT_RESET, tmp);
3697                 tmp = RREG32(GRBM_SOFT_RESET);
3698
3699                 udelay(50);
3700
3701                 tmp &= ~grbm_soft_reset;
3702                 WREG32(GRBM_SOFT_RESET, tmp);
3703                 tmp = RREG32(GRBM_SOFT_RESET);
3704         }
3705
3706         if (srbm_soft_reset) {
3707                 tmp = RREG32(SRBM_SOFT_RESET);
3708                 tmp |= srbm_soft_reset;
3709                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3710                 WREG32(SRBM_SOFT_RESET, tmp);
3711                 tmp = RREG32(SRBM_SOFT_RESET);
3712
3713                 udelay(50);
3714
3715                 tmp &= ~srbm_soft_reset;
3716                 WREG32(SRBM_SOFT_RESET, tmp);
3717                 tmp = RREG32(SRBM_SOFT_RESET);
3718         }
3719
3720         /* Wait a little for things to settle down */
3721         udelay(50);
3722
3723         evergreen_mc_resume(rdev, &save);
3724         udelay(50);
3725
3726         evergreen_print_gpu_status_regs(rdev);
3727 }
3728
3729 int si_asic_reset(struct radeon_device *rdev)
3730 {
3731         u32 reset_mask;
3732
3733         reset_mask = si_gpu_check_soft_reset(rdev);
3734
3735         if (reset_mask)
3736                 r600_set_bios_scratch_engine_hung(rdev, true);
3737
3738         si_gpu_soft_reset(rdev, reset_mask);
3739
3740         reset_mask = si_gpu_check_soft_reset(rdev);
3741
3742         if (!reset_mask)
3743                 r600_set_bios_scratch_engine_hung(rdev, false);
3744
3745         return 0;
3746 }
3747
3748 /**
3749  * si_gfx_is_lockup - Check if the GFX engine is locked up
3750  *
3751  * @rdev: radeon_device pointer
3752  * @ring: radeon_ring structure holding ring information
3753  *
3754  * Check if the GFX engine is locked up.
3755  * Returns true if the engine appears to be locked up, false if not.
3756  */
3757 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3758 {
3759         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3760
3761         if (!(reset_mask & (RADEON_RESET_GFX |
3762                             RADEON_RESET_COMPUTE |
3763                             RADEON_RESET_CP))) {
3764                 radeon_ring_lockup_update(ring);
3765                 return false;
3766         }
3767         /* force CP activities */
3768         radeon_ring_force_activity(rdev, ring);
3769         return radeon_ring_test_lockup(rdev, ring);
3770 }
3771
3772 /* MC */
3773 static void si_mc_program(struct radeon_device *rdev)
3774 {
3775         struct evergreen_mc_save save;
3776         u32 tmp;
3777         int i, j;
3778
3779         /* Initialize HDP */
3780         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3781                 WREG32((0x2c14 + j), 0x00000000);
3782                 WREG32((0x2c18 + j), 0x00000000);
3783                 WREG32((0x2c1c + j), 0x00000000);
3784                 WREG32((0x2c20 + j), 0x00000000);
3785                 WREG32((0x2c24 + j), 0x00000000);
3786         }
3787         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3788
3789         evergreen_mc_stop(rdev, &save);
3790         if (radeon_mc_wait_for_idle(rdev)) {
3791                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3792         }
3793         if (!ASIC_IS_NODCE(rdev))
3794                 /* Lockout access through VGA aperture*/
3795                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3796         /* Update configuration */
3797         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3798                rdev->mc.vram_start >> 12);
3799         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3800                rdev->mc.vram_end >> 12);
3801         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3802                rdev->vram_scratch.gpu_addr >> 12);
3803         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3804         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3805         WREG32(MC_VM_FB_LOCATION, tmp);
3806         /* XXX double check these! */
3807         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3808         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3809         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3810         WREG32(MC_VM_AGP_BASE, 0);
3811         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3812         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3813         if (radeon_mc_wait_for_idle(rdev)) {
3814                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3815         }
3816         evergreen_mc_resume(rdev, &save);
3817         if (!ASIC_IS_NODCE(rdev)) {
3818                 /* we need to own VRAM, so turn off the VGA renderer here
3819                  * to stop it overwriting our objects */
3820                 rv515_vga_render_disable(rdev);
3821         }
3822 }
3823
3824 void si_vram_gtt_location(struct radeon_device *rdev,
3825                           struct radeon_mc *mc)
3826 {
3827         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3828                 /* leave room for at least 1024M GTT */
3829                 dev_warn(rdev->dev, "limiting VRAM\n");
3830                 mc->real_vram_size = 0xFFC0000000ULL;
3831                 mc->mc_vram_size = 0xFFC0000000ULL;
3832         }
3833         radeon_vram_location(rdev, &rdev->mc, 0);
3834         rdev->mc.gtt_base_align = 0;
3835         radeon_gtt_location(rdev, mc);
3836 }
3837
3838 static int si_mc_init(struct radeon_device *rdev)
3839 {
3840         u32 tmp;
3841         int chansize, numchan;
3842
3843         /* Get VRAM informations */
3844         rdev->mc.vram_is_ddr = true;
3845         tmp = RREG32(MC_ARB_RAMCFG);
3846         if (tmp & CHANSIZE_OVERRIDE) {
3847                 chansize = 16;
3848         } else if (tmp & CHANSIZE_MASK) {
3849                 chansize = 64;
3850         } else {
3851                 chansize = 32;
3852         }
3853         tmp = RREG32(MC_SHARED_CHMAP);
3854         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3855         case 0:
3856         default:
3857                 numchan = 1;
3858                 break;
3859         case 1:
3860                 numchan = 2;
3861                 break;
3862         case 2:
3863                 numchan = 4;
3864                 break;
3865         case 3:
3866                 numchan = 8;
3867                 break;
3868         case 4:
3869                 numchan = 3;
3870                 break;
3871         case 5:
3872                 numchan = 6;
3873                 break;
3874         case 6:
3875                 numchan = 10;
3876                 break;
3877         case 7:
3878                 numchan = 12;
3879                 break;
3880         case 8:
3881                 numchan = 16;
3882                 break;
3883         }
3884         rdev->mc.vram_width = numchan * chansize;
3885         /* Could aper size report 0 ? */
3886         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3887         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3888         /* size in MB on si */
3889         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3890         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3891         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3892         si_vram_gtt_location(rdev, &rdev->mc);
3893         radeon_update_bandwidth_info(rdev);
3894
3895         return 0;
3896 }
3897
3898 /*
3899  * GART
3900  */
3901 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3902 {
3903         /* flush hdp cache */
3904         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3905
3906         /* bits 0-15 are the VM contexts0-15 */
3907         WREG32(VM_INVALIDATE_REQUEST, 1);
3908 }
3909
3910 static int si_pcie_gart_enable(struct radeon_device *rdev)
3911 {
3912         int r, i;
3913
3914         if (rdev->gart.robj == NULL) {
3915                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3916                 return -EINVAL;
3917         }
3918         r = radeon_gart_table_vram_pin(rdev);
3919         if (r)
3920                 return r;
3921         radeon_gart_restore(rdev);
3922         /* Setup TLB control */
3923         WREG32(MC_VM_MX_L1_TLB_CNTL,
3924                (0xA << 7) |
3925                ENABLE_L1_TLB |
3926                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3927                ENABLE_ADVANCED_DRIVER_MODEL |
3928                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3929         /* Setup L2 cache */
3930         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3931                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3932                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3933                EFFECTIVE_L2_QUEUE_SIZE(7) |
3934                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3935         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3936         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3937                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3938         /* setup context0 */
3939         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3940         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3941         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3942         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3943                         (u32)(rdev->dummy_page.addr >> 12));
3944         WREG32(VM_CONTEXT0_CNTL2, 0);
3945         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3946                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3947
3948         WREG32(0x15D4, 0);
3949         WREG32(0x15D8, 0);
3950         WREG32(0x15DC, 0);
3951
3952         /* empty context1-15 */
3953         /* set vm size, must be a multiple of 4 */
3954         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3955         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3956         /* Assign the pt base to something valid for now; the pts used for
3957          * the VMs are determined by the application and setup and assigned
3958          * on the fly in the vm part of radeon_gart.c
3959          */
3960         for (i = 1; i < 16; i++) {
3961                 if (i < 8)
3962                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3963                                rdev->gart.table_addr >> 12);
3964                 else
3965                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3966                                rdev->gart.table_addr >> 12);
3967         }
3968
3969         /* enable context1-15 */
3970         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3971                (u32)(rdev->dummy_page.addr >> 12));
3972         WREG32(VM_CONTEXT1_CNTL2, 4);
3973         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3974                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3975                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3976                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3977                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3978                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3979                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3980                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3981                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3982                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3983                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3984                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3985                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3986
3987         si_pcie_gart_tlb_flush(rdev);
3988         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3989                  (unsigned)(rdev->mc.gtt_size >> 20),
3990                  (unsigned long long)rdev->gart.table_addr);
3991         rdev->gart.ready = true;
3992         return 0;
3993 }
3994
3995 static void si_pcie_gart_disable(struct radeon_device *rdev)
3996 {
3997         /* Disable all tables */
3998         WREG32(VM_CONTEXT0_CNTL, 0);
3999         WREG32(VM_CONTEXT1_CNTL, 0);
4000         /* Setup TLB control */
4001         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4002                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4003         /* Setup L2 cache */
4004         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4005                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4006                EFFECTIVE_L2_QUEUE_SIZE(7) |
4007                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4008         WREG32(VM_L2_CNTL2, 0);
4009         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4010                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4011         radeon_gart_table_vram_unpin(rdev);
4012 }
4013
4014 static void si_pcie_gart_fini(struct radeon_device *rdev)
4015 {
4016         si_pcie_gart_disable(rdev);
4017         radeon_gart_table_vram_free(rdev);
4018         radeon_gart_fini(rdev);
4019 }
4020
4021 /* vm parser */
4022 static bool si_vm_reg_valid(u32 reg)
4023 {
4024         /* context regs are fine */
4025         if (reg >= 0x28000)
4026                 return true;
4027
4028         /* check config regs */
4029         switch (reg) {
4030         case GRBM_GFX_INDEX:
4031         case CP_STRMOUT_CNTL:
4032         case VGT_VTX_VECT_EJECT_REG:
4033         case VGT_CACHE_INVALIDATION:
4034         case VGT_ESGS_RING_SIZE:
4035         case VGT_GSVS_RING_SIZE:
4036         case VGT_GS_VERTEX_REUSE:
4037         case VGT_PRIMITIVE_TYPE:
4038         case VGT_INDEX_TYPE:
4039         case VGT_NUM_INDICES:
4040         case VGT_NUM_INSTANCES:
4041         case VGT_TF_RING_SIZE:
4042         case VGT_HS_OFFCHIP_PARAM:
4043         case VGT_TF_MEMORY_BASE:
4044         case PA_CL_ENHANCE:
4045         case PA_SU_LINE_STIPPLE_VALUE:
4046         case PA_SC_LINE_STIPPLE_STATE:
4047         case PA_SC_ENHANCE:
4048         case SQC_CACHES:
4049         case SPI_STATIC_THREAD_MGMT_1:
4050         case SPI_STATIC_THREAD_MGMT_2:
4051         case SPI_STATIC_THREAD_MGMT_3:
4052         case SPI_PS_MAX_WAVE_ID:
4053         case SPI_CONFIG_CNTL:
4054         case SPI_CONFIG_CNTL_1:
4055         case TA_CNTL_AUX:
4056                 return true;
4057         default:
4058                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4059                 return false;
4060         }
4061 }
4062
4063 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4064                                   u32 *ib, struct radeon_cs_packet *pkt)
4065 {
4066         switch (pkt->opcode) {
4067         case PACKET3_NOP:
4068         case PACKET3_SET_BASE:
4069         case PACKET3_SET_CE_DE_COUNTERS:
4070         case PACKET3_LOAD_CONST_RAM:
4071         case PACKET3_WRITE_CONST_RAM:
4072         case PACKET3_WRITE_CONST_RAM_OFFSET:
4073         case PACKET3_DUMP_CONST_RAM:
4074         case PACKET3_INCREMENT_CE_COUNTER:
4075         case PACKET3_WAIT_ON_DE_COUNTER:
4076         case PACKET3_CE_WRITE:
4077                 break;
4078         default:
4079                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4080                 return -EINVAL;
4081         }
4082         return 0;
4083 }
4084
4085 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4086 {
4087         u32 start_reg, reg, i;
4088         u32 command = ib[idx + 4];
4089         u32 info = ib[idx + 1];
4090         u32 idx_value = ib[idx];
4091         if (command & PACKET3_CP_DMA_CMD_SAS) {
4092                 /* src address space is register */
4093                 if (((info & 0x60000000) >> 29) == 0) {
4094                         start_reg = idx_value << 2;
4095                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4096                                 reg = start_reg;
4097                                 if (!si_vm_reg_valid(reg)) {
4098                                         DRM_ERROR("CP DMA Bad SRC register\n");
4099                                         return -EINVAL;
4100                                 }
4101                         } else {
4102                                 for (i = 0; i < (command & 0x1fffff); i++) {
4103                                         reg = start_reg + (4 * i);
4104                                         if (!si_vm_reg_valid(reg)) {
4105                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4106                                                 return -EINVAL;
4107                                         }
4108                                 }
4109                         }
4110                 }
4111         }
4112         if (command & PACKET3_CP_DMA_CMD_DAS) {
4113                 /* dst address space is register */
4114                 if (((info & 0x00300000) >> 20) == 0) {
4115                         start_reg = ib[idx + 2];
4116                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4117                                 reg = start_reg;
4118                                 if (!si_vm_reg_valid(reg)) {
4119                                         DRM_ERROR("CP DMA Bad DST register\n");
4120                                         return -EINVAL;
4121                                 }
4122                         } else {
4123                                 for (i = 0; i < (command & 0x1fffff); i++) {
4124                                         reg = start_reg + (4 * i);
4125                                 if (!si_vm_reg_valid(reg)) {
4126                                                 DRM_ERROR("CP DMA Bad DST register\n");
4127                                                 return -EINVAL;
4128                                         }
4129                                 }
4130                         }
4131                 }
4132         }
4133         return 0;
4134 }
4135
4136 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4137                                    u32 *ib, struct radeon_cs_packet *pkt)
4138 {
4139         int r;
4140         u32 idx = pkt->idx + 1;
4141         u32 idx_value = ib[idx];
4142         u32 start_reg, end_reg, reg, i;
4143
4144         switch (pkt->opcode) {
4145         case PACKET3_NOP:
4146         case PACKET3_SET_BASE:
4147         case PACKET3_CLEAR_STATE:
4148         case PACKET3_INDEX_BUFFER_SIZE:
4149         case PACKET3_DISPATCH_DIRECT:
4150         case PACKET3_DISPATCH_INDIRECT:
4151         case PACKET3_ALLOC_GDS:
4152         case PACKET3_WRITE_GDS_RAM:
4153         case PACKET3_ATOMIC_GDS:
4154         case PACKET3_ATOMIC:
4155         case PACKET3_OCCLUSION_QUERY:
4156         case PACKET3_SET_PREDICATION:
4157         case PACKET3_COND_EXEC:
4158         case PACKET3_PRED_EXEC:
4159         case PACKET3_DRAW_INDIRECT:
4160         case PACKET3_DRAW_INDEX_INDIRECT:
4161         case PACKET3_INDEX_BASE:
4162         case PACKET3_DRAW_INDEX_2:
4163         case PACKET3_CONTEXT_CONTROL:
4164         case PACKET3_INDEX_TYPE:
4165         case PACKET3_DRAW_INDIRECT_MULTI:
4166         case PACKET3_DRAW_INDEX_AUTO:
4167         case PACKET3_DRAW_INDEX_IMMD:
4168         case PACKET3_NUM_INSTANCES:
4169         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4170         case PACKET3_STRMOUT_BUFFER_UPDATE:
4171         case PACKET3_DRAW_INDEX_OFFSET_2:
4172         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4173         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4174         case PACKET3_MPEG_INDEX:
4175         case PACKET3_WAIT_REG_MEM:
4176         case PACKET3_MEM_WRITE:
4177         case PACKET3_PFP_SYNC_ME:
4178         case PACKET3_SURFACE_SYNC:
4179         case PACKET3_EVENT_WRITE:
4180         case PACKET3_EVENT_WRITE_EOP:
4181         case PACKET3_EVENT_WRITE_EOS:
4182         case PACKET3_SET_CONTEXT_REG:
4183         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4184         case PACKET3_SET_SH_REG:
4185         case PACKET3_SET_SH_REG_OFFSET:
4186         case PACKET3_INCREMENT_DE_COUNTER:
4187         case PACKET3_WAIT_ON_CE_COUNTER:
4188         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4189         case PACKET3_ME_WRITE:
4190                 break;
4191         case PACKET3_COPY_DATA:
4192                 if ((idx_value & 0xf00) == 0) {
4193                         reg = ib[idx + 3] * 4;
4194                         if (!si_vm_reg_valid(reg))
4195                                 return -EINVAL;
4196                 }
4197                 break;
4198         case PACKET3_WRITE_DATA:
4199                 if ((idx_value & 0xf00) == 0) {
4200                         start_reg = ib[idx + 1] * 4;
4201                         if (idx_value & 0x10000) {
4202                                 if (!si_vm_reg_valid(start_reg))
4203                                         return -EINVAL;
4204                         } else {
4205                                 for (i = 0; i < (pkt->count - 2); i++) {
4206                                         reg = start_reg + (4 * i);
4207                                         if (!si_vm_reg_valid(reg))
4208                                                 return -EINVAL;
4209                                 }
4210                         }
4211                 }
4212                 break;
4213         case PACKET3_COND_WRITE:
4214                 if (idx_value & 0x100) {
4215                         reg = ib[idx + 5] * 4;
4216                         if (!si_vm_reg_valid(reg))
4217                                 return -EINVAL;
4218                 }
4219                 break;
4220         case PACKET3_COPY_DW:
4221                 if (idx_value & 0x2) {
4222                         reg = ib[idx + 3] * 4;
4223                         if (!si_vm_reg_valid(reg))
4224                                 return -EINVAL;
4225                 }
4226                 break;
4227         case PACKET3_SET_CONFIG_REG:
4228                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4229                 end_reg = 4 * pkt->count + start_reg - 4;
4230                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4231                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4232                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4233                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4234                         return -EINVAL;
4235                 }
4236                 for (i = 0; i < pkt->count; i++) {
4237                         reg = start_reg + (4 * i);
4238                         if (!si_vm_reg_valid(reg))
4239                                 return -EINVAL;
4240                 }
4241                 break;
4242         case PACKET3_CP_DMA:
4243                 r = si_vm_packet3_cp_dma_check(ib, idx);
4244                 if (r)
4245                         return r;
4246                 break;
4247         default:
4248                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4249                 return -EINVAL;
4250         }
4251         return 0;
4252 }
4253
4254 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4255                                        u32 *ib, struct radeon_cs_packet *pkt)
4256 {
4257         int r;
4258         u32 idx = pkt->idx + 1;
4259         u32 idx_value = ib[idx];
4260         u32 start_reg, reg, i;
4261
4262         switch (pkt->opcode) {
4263         case PACKET3_NOP:
4264         case PACKET3_SET_BASE:
4265         case PACKET3_CLEAR_STATE:
4266         case PACKET3_DISPATCH_DIRECT:
4267         case PACKET3_DISPATCH_INDIRECT:
4268         case PACKET3_ALLOC_GDS:
4269         case PACKET3_WRITE_GDS_RAM:
4270         case PACKET3_ATOMIC_GDS:
4271         case PACKET3_ATOMIC:
4272         case PACKET3_OCCLUSION_QUERY:
4273         case PACKET3_SET_PREDICATION:
4274         case PACKET3_COND_EXEC:
4275         case PACKET3_PRED_EXEC:
4276         case PACKET3_CONTEXT_CONTROL:
4277         case PACKET3_STRMOUT_BUFFER_UPDATE:
4278         case PACKET3_WAIT_REG_MEM:
4279         case PACKET3_MEM_WRITE:
4280         case PACKET3_PFP_SYNC_ME:
4281         case PACKET3_SURFACE_SYNC:
4282         case PACKET3_EVENT_WRITE:
4283         case PACKET3_EVENT_WRITE_EOP:
4284         case PACKET3_EVENT_WRITE_EOS:
4285         case PACKET3_SET_CONTEXT_REG:
4286         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4287         case PACKET3_SET_SH_REG:
4288         case PACKET3_SET_SH_REG_OFFSET:
4289         case PACKET3_INCREMENT_DE_COUNTER:
4290         case PACKET3_WAIT_ON_CE_COUNTER:
4291         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4292         case PACKET3_ME_WRITE:
4293                 break;
4294         case PACKET3_COPY_DATA:
4295                 if ((idx_value & 0xf00) == 0) {
4296                         reg = ib[idx + 3] * 4;
4297                         if (!si_vm_reg_valid(reg))
4298                                 return -EINVAL;
4299                 }
4300                 break;
4301         case PACKET3_WRITE_DATA:
4302                 if ((idx_value & 0xf00) == 0) {
4303                         start_reg = ib[idx + 1] * 4;
4304                         if (idx_value & 0x10000) {
4305                                 if (!si_vm_reg_valid(start_reg))
4306                                         return -EINVAL;
4307                         } else {
4308                                 for (i = 0; i < (pkt->count - 2); i++) {
4309                                         reg = start_reg + (4 * i);
4310                                         if (!si_vm_reg_valid(reg))
4311                                                 return -EINVAL;
4312                                 }
4313                         }
4314                 }
4315                 break;
4316         case PACKET3_COND_WRITE:
4317                 if (idx_value & 0x100) {
4318                         reg = ib[idx + 5] * 4;
4319                         if (!si_vm_reg_valid(reg))
4320                                 return -EINVAL;
4321                 }
4322                 break;
4323         case PACKET3_COPY_DW:
4324                 if (idx_value & 0x2) {
4325                         reg = ib[idx + 3] * 4;
4326                         if (!si_vm_reg_valid(reg))
4327                                 return -EINVAL;
4328                 }
4329                 break;
4330         case PACKET3_CP_DMA:
4331                 r = si_vm_packet3_cp_dma_check(ib, idx);
4332                 if (r)
4333                         return r;
4334                 break;
4335         default:
4336                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4337                 return -EINVAL;
4338         }
4339         return 0;
4340 }
4341
4342 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4343 {
4344         int ret = 0;
4345         u32 idx = 0;
4346         struct radeon_cs_packet pkt;
4347
4348         do {
4349                 pkt.idx = idx;
4350                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4351                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4352                 pkt.one_reg_wr = 0;
4353                 switch (pkt.type) {
4354                 case RADEON_PACKET_TYPE0:
4355                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4356                         ret = -EINVAL;
4357                         break;
4358                 case RADEON_PACKET_TYPE2:
4359                         idx += 1;
4360                         break;
4361                 case RADEON_PACKET_TYPE3:
4362                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4363                         if (ib->is_const_ib)
4364                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4365                         else {
4366                                 switch (ib->ring) {
4367                                 case RADEON_RING_TYPE_GFX_INDEX:
4368                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4369                                         break;
4370                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4371                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4372                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4373                                         break;
4374                                 default:
4375                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4376                                         ret = -EINVAL;
4377                                         break;
4378                                 }
4379                         }
4380                         idx += pkt.count + 2;
4381                         break;
4382                 default:
4383                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4384                         ret = -EINVAL;
4385                         break;
4386                 }
4387                 if (ret)
4388                         break;
4389         } while (idx < ib->length_dw);
4390
4391         return ret;
4392 }
4393
4394 /*
4395  * vm
4396  */
4397 int si_vm_init(struct radeon_device *rdev)
4398 {
4399         /* number of VMs */
4400         rdev->vm_manager.nvm = 16;
4401         /* base offset of vram pages */
4402         rdev->vm_manager.vram_base_offset = 0;
4403
4404         return 0;
4405 }
4406
4407 void si_vm_fini(struct radeon_device *rdev)
4408 {
4409 }
4410
4411 /**
4412  * si_vm_decode_fault - print human readable fault info
4413  *
4414  * @rdev: radeon_device pointer
4415  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4416  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4417  *
4418  * Print human readable fault information (SI).
4419  */
4420 static void si_vm_decode_fault(struct radeon_device *rdev,
4421                                u32 status, u32 addr)
4422 {
4423         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4424         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4425         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4426         char *block;
4427
4428         if (rdev->family == CHIP_TAHITI) {
4429                 switch (mc_id) {
4430                 case 160:
4431                 case 144:
4432                 case 96:
4433                 case 80:
4434                 case 224:
4435                 case 208:
4436                 case 32:
4437                 case 16:
4438                         block = "CB";
4439                         break;
4440                 case 161:
4441                 case 145:
4442                 case 97:
4443                 case 81:
4444                 case 225:
4445                 case 209:
4446                 case 33:
4447                 case 17:
4448                         block = "CB_FMASK";
4449                         break;
4450                 case 162:
4451                 case 146:
4452                 case 98:
4453                 case 82:
4454                 case 226:
4455                 case 210:
4456                 case 34:
4457                 case 18:
4458                         block = "CB_CMASK";
4459                         break;
4460                 case 163:
4461                 case 147:
4462                 case 99:
4463                 case 83:
4464                 case 227:
4465                 case 211:
4466                 case 35:
4467                 case 19:
4468                         block = "CB_IMMED";
4469                         break;
4470                 case 164:
4471                 case 148:
4472                 case 100:
4473                 case 84:
4474                 case 228:
4475                 case 212:
4476                 case 36:
4477                 case 20:
4478                         block = "DB";
4479                         break;
4480                 case 165:
4481                 case 149:
4482                 case 101:
4483                 case 85:
4484                 case 229:
4485                 case 213:
4486                 case 37:
4487                 case 21:
4488                         block = "DB_HTILE";
4489                         break;
4490                 case 167:
4491                 case 151:
4492                 case 103:
4493                 case 87:
4494                 case 231:
4495                 case 215:
4496                 case 39:
4497                 case 23:
4498                         block = "DB_STEN";
4499                         break;
4500                 case 72:
4501                 case 68:
4502                 case 64:
4503                 case 8:
4504                 case 4:
4505                 case 0:
4506                 case 136:
4507                 case 132:
4508                 case 128:
4509                 case 200:
4510                 case 196:
4511                 case 192:
4512                         block = "TC";
4513                         break;
4514                 case 112:
4515                 case 48:
4516                         block = "CP";
4517                         break;
4518                 case 49:
4519                 case 177:
4520                 case 50:
4521                 case 178:
4522                         block = "SH";
4523                         break;
4524                 case 53:
4525                 case 190:
4526                         block = "VGT";
4527                         break;
4528                 case 117:
4529                         block = "IH";
4530                         break;
4531                 case 51:
4532                 case 115:
4533                         block = "RLC";
4534                         break;
4535                 case 119:
4536                 case 183:
4537                         block = "DMA0";
4538                         break;
4539                 case 61:
4540                         block = "DMA1";
4541                         break;
4542                 case 248:
4543                 case 120:
4544                         block = "HDP";
4545                         break;
4546                 default:
4547                         block = "unknown";
4548                         break;
4549                 }
4550         } else {
4551                 switch (mc_id) {
4552                 case 32:
4553                 case 16:
4554                 case 96:
4555                 case 80:
4556                 case 160:
4557                 case 144:
4558                 case 224:
4559                 case 208:
4560                         block = "CB";
4561                         break;
4562                 case 33:
4563                 case 17:
4564                 case 97:
4565                 case 81:
4566                 case 161:
4567                 case 145:
4568                 case 225:
4569                 case 209:
4570                         block = "CB_FMASK";
4571                         break;
4572                 case 34:
4573                 case 18:
4574                 case 98:
4575                 case 82:
4576                 case 162:
4577                 case 146:
4578                 case 226:
4579                 case 210:
4580                         block = "CB_CMASK";
4581                         break;
4582                 case 35:
4583                 case 19:
4584                 case 99:
4585                 case 83:
4586                 case 163:
4587                 case 147:
4588                 case 227:
4589                 case 211:
4590                         block = "CB_IMMED";
4591                         break;
4592                 case 36:
4593                 case 20:
4594                 case 100:
4595                 case 84:
4596                 case 164:
4597                 case 148:
4598                 case 228:
4599                 case 212:
4600                         block = "DB";
4601                         break;
4602                 case 37:
4603                 case 21:
4604                 case 101:
4605                 case 85:
4606                 case 165:
4607                 case 149:
4608                 case 229:
4609                 case 213:
4610                         block = "DB_HTILE";
4611                         break;
4612                 case 39:
4613                 case 23:
4614                 case 103:
4615                 case 87:
4616                 case 167:
4617                 case 151:
4618                 case 231:
4619                 case 215:
4620                         block = "DB_STEN";
4621                         break;
4622                 case 72:
4623                 case 68:
4624                 case 8:
4625                 case 4:
4626                 case 136:
4627                 case 132:
4628                 case 200:
4629                 case 196:
4630                         block = "TC";
4631                         break;
4632                 case 112:
4633                 case 48:
4634                         block = "CP";
4635                         break;
4636                 case 49:
4637                 case 177:
4638                 case 50:
4639                 case 178:
4640                         block = "SH";
4641                         break;
4642                 case 53:
4643                         block = "VGT";
4644                         break;
4645                 case 117:
4646                         block = "IH";
4647                         break;
4648                 case 51:
4649                 case 115:
4650                         block = "RLC";
4651                         break;
4652                 case 119:
4653                 case 183:
4654                         block = "DMA0";
4655                         break;
4656                 case 61:
4657                         block = "DMA1";
4658                         break;
4659                 case 248:
4660                 case 120:
4661                         block = "HDP";
4662                         break;
4663                 default:
4664                         block = "unknown";
4665                         break;
4666                 }
4667         }
4668
4669         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4670                protections, vmid, addr,
4671                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4672                block, mc_id);
4673 }
4674
4675 /**
4676  * si_vm_set_page - update the page tables using the CP
4677  *
4678  * @rdev: radeon_device pointer
4679  * @ib: indirect buffer to fill with commands
4680  * @pe: addr of the page entry
4681  * @addr: dst addr to write into pe
4682  * @count: number of page entries to update
4683  * @incr: increase next addr by incr bytes
4684  * @flags: access flags
4685  *
4686  * Update the page tables using the CP (SI).
4687  */
4688 void si_vm_set_page(struct radeon_device *rdev,
4689                     struct radeon_ib *ib,
4690                     uint64_t pe,
4691                     uint64_t addr, unsigned count,
4692                     uint32_t incr, uint32_t flags)
4693 {
4694         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4695         uint64_t value;
4696         unsigned ndw;
4697
4698         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4699                 while (count) {
4700                         ndw = 2 + count * 2;
4701                         if (ndw > 0x3FFE)
4702                                 ndw = 0x3FFE;
4703
4704                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4705                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4706                                         WRITE_DATA_DST_SEL(1));
4707                         ib->ptr[ib->length_dw++] = pe;
4708                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4709                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4710                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4711                                         value = radeon_vm_map_gart(rdev, addr);
4712                                         value &= 0xFFFFFFFFFFFFF000ULL;
4713                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4714                                         value = addr;
4715                                 } else {
4716                                         value = 0;
4717                                 }
4718                                 addr += incr;
4719                                 value |= r600_flags;
4720                                 ib->ptr[ib->length_dw++] = value;
4721                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4722                         }
4723                 }
4724         } else {
4725                 /* DMA */
4726                 si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4727         }
4728 }
4729
4730 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4731 {
4732         struct radeon_ring *ring = &rdev->ring[ridx];
4733
4734         if (vm == NULL)
4735                 return;
4736
4737         /* write new base address */
4738         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4739         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4740                                  WRITE_DATA_DST_SEL(0)));
4741
4742         if (vm->id < 8) {
4743                 radeon_ring_write(ring,
4744                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4745         } else {
4746                 radeon_ring_write(ring,
4747                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4748         }
4749         radeon_ring_write(ring, 0);
4750         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4751
4752         /* flush hdp cache */
4753         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4754         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4755                                  WRITE_DATA_DST_SEL(0)));
4756         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4757         radeon_ring_write(ring, 0);
4758         radeon_ring_write(ring, 0x1);
4759
4760         /* bits 0-15 are the VM contexts0-15 */
4761         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763                                  WRITE_DATA_DST_SEL(0)));
4764         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4765         radeon_ring_write(ring, 0);
4766         radeon_ring_write(ring, 1 << vm->id);
4767
4768         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4769         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4770         radeon_ring_write(ring, 0x0);
4771 }
4772
4773 /*
4774  *  Power and clock gating
4775  */
4776 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4777 {
4778         int i;
4779
4780         for (i = 0; i < rdev->usec_timeout; i++) {
4781                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4782                         break;
4783                 udelay(1);
4784         }
4785
4786         for (i = 0; i < rdev->usec_timeout; i++) {
4787                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4788                         break;
4789                 udelay(1);
4790         }
4791 }
4792
4793 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4794                                          bool enable)
4795 {
4796         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4797         u32 mask;
4798         int i;
4799
4800         if (enable)
4801                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4802         else
4803                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4804         WREG32(CP_INT_CNTL_RING0, tmp);
4805
4806         if (!enable) {
4807                 /* read a gfx register */
4808                 tmp = RREG32(DB_DEPTH_INFO);
4809
4810                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4811                 for (i = 0; i < rdev->usec_timeout; i++) {
4812                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4813                                 break;
4814                         udelay(1);
4815                 }
4816         }
4817 }
4818
4819 static void si_set_uvd_dcm(struct radeon_device *rdev,
4820                            bool sw_mode)
4821 {
4822         u32 tmp, tmp2;
4823
4824         tmp = RREG32(UVD_CGC_CTRL);
4825         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4826         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4827
4828         if (sw_mode) {
4829                 tmp &= ~0x7ffff800;
4830                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4831         } else {
4832                 tmp |= 0x7ffff800;
4833                 tmp2 = 0;
4834         }
4835
4836         WREG32(UVD_CGC_CTRL, tmp);
4837         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4838 }
4839
4840 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4841 {
4842         bool hw_mode = true;
4843
4844         if (hw_mode) {
4845                 si_set_uvd_dcm(rdev, false);
4846         } else {
4847                 u32 tmp = RREG32(UVD_CGC_CTRL);
4848                 tmp &= ~DCM;
4849                 WREG32(UVD_CGC_CTRL, tmp);
4850         }
4851 }
4852
4853 static u32 si_halt_rlc(struct radeon_device *rdev)
4854 {
4855         u32 data, orig;
4856
4857         orig = data = RREG32(RLC_CNTL);
4858
4859         if (data & RLC_ENABLE) {
4860                 data &= ~RLC_ENABLE;
4861                 WREG32(RLC_CNTL, data);
4862
4863                 si_wait_for_rlc_serdes(rdev);
4864         }
4865
4866         return orig;
4867 }
4868
4869 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4870 {
4871         u32 tmp;
4872
4873         tmp = RREG32(RLC_CNTL);
4874         if (tmp != rlc)
4875                 WREG32(RLC_CNTL, rlc);
4876 }
4877
4878 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4879 {
4880         u32 data, orig;
4881
4882         orig = data = RREG32(DMA_PG);
4883         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4884                 data |= PG_CNTL_ENABLE;
4885         else
4886                 data &= ~PG_CNTL_ENABLE;
4887         if (orig != data)
4888                 WREG32(DMA_PG, data);
4889 }
4890
4891 static void si_init_dma_pg(struct radeon_device *rdev)
4892 {
4893         u32 tmp;
4894
4895         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4896         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4897
4898         for (tmp = 0; tmp < 5; tmp++)
4899                 WREG32(DMA_PGFSM_WRITE, 0);
4900 }
4901
4902 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4903                                bool enable)
4904 {
4905         u32 tmp;
4906
4907         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4908                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4909                 WREG32(RLC_TTOP_D, tmp);
4910
4911                 tmp = RREG32(RLC_PG_CNTL);
4912                 tmp |= GFX_PG_ENABLE;
4913                 WREG32(RLC_PG_CNTL, tmp);
4914
4915                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4916                 tmp |= AUTO_PG_EN;
4917                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4918         } else {
4919                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4920                 tmp &= ~AUTO_PG_EN;
4921                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4922
4923                 tmp = RREG32(DB_RENDER_CONTROL);
4924         }
4925 }
4926
4927 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4928 {
4929         u32 tmp;
4930
4931         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4932
4933         tmp = RREG32(RLC_PG_CNTL);
4934         tmp |= GFX_PG_SRC;
4935         WREG32(RLC_PG_CNTL, tmp);
4936
4937         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4938
4939         tmp = RREG32(RLC_AUTO_PG_CTRL);
4940
4941         tmp &= ~GRBM_REG_SGIT_MASK;
4942         tmp |= GRBM_REG_SGIT(0x700);
4943         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4944         WREG32(RLC_AUTO_PG_CTRL, tmp);
4945 }
4946
4947 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4948 {
4949         u32 mask = 0, tmp, tmp1;
4950         int i;
4951
4952         si_select_se_sh(rdev, se, sh);
4953         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4954         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4955         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4956
4957         tmp &= 0xffff0000;
4958
4959         tmp |= tmp1;
4960         tmp >>= 16;
4961
4962         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4963                 mask <<= 1;
4964                 mask |= 1;
4965         }
4966
4967         return (~tmp) & mask;
4968 }
4969
4970 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4971 {
4972         u32 i, j, k, active_cu_number = 0;
4973         u32 mask, counter, cu_bitmap;
4974         u32 tmp = 0;
4975
4976         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4977                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4978                         mask = 1;
4979                         cu_bitmap = 0;
4980                         counter  = 0;
4981                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4982                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4983                                         if (counter < 2)
4984                                                 cu_bitmap |= mask;
4985                                         counter++;
4986                                 }
4987                                 mask <<= 1;
4988                         }
4989
4990                         active_cu_number += counter;
4991                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4992                 }
4993         }
4994
4995         WREG32(RLC_PG_AO_CU_MASK, tmp);
4996
4997         tmp = RREG32(RLC_MAX_PG_CU);
4998         tmp &= ~MAX_PU_CU_MASK;
4999         tmp |= MAX_PU_CU(active_cu_number);
5000         WREG32(RLC_MAX_PG_CU, tmp);
5001 }
5002
5003 static void si_enable_cgcg(struct radeon_device *rdev,
5004                            bool enable)
5005 {
5006         u32 data, orig, tmp;
5007
5008         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5009
5010         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5011                 si_enable_gui_idle_interrupt(rdev, true);
5012
5013                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5014
5015                 tmp = si_halt_rlc(rdev);
5016
5017                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5018                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5019                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5020
5021                 si_wait_for_rlc_serdes(rdev);
5022
5023                 si_update_rlc(rdev, tmp);
5024
5025                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5026
5027                 data |= CGCG_EN | CGLS_EN;
5028         } else {
5029                 si_enable_gui_idle_interrupt(rdev, false);
5030
5031                 RREG32(CB_CGTT_SCLK_CTRL);
5032                 RREG32(CB_CGTT_SCLK_CTRL);
5033                 RREG32(CB_CGTT_SCLK_CTRL);
5034                 RREG32(CB_CGTT_SCLK_CTRL);
5035
5036                 data &= ~(CGCG_EN | CGLS_EN);
5037         }
5038
5039         if (orig != data)
5040                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5041 }
5042
5043 static void si_enable_mgcg(struct radeon_device *rdev,
5044                            bool enable)
5045 {
5046         u32 data, orig, tmp = 0;
5047
5048         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5049                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5050                 data = 0x96940200;
5051                 if (orig != data)
5052                         WREG32(CGTS_SM_CTRL_REG, data);
5053
5054                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5055                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5056                         data |= CP_MEM_LS_EN;
5057                         if (orig != data)
5058                                 WREG32(CP_MEM_SLP_CNTL, data);
5059                 }
5060
5061                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5062                 data &= 0xffffffc0;
5063                 if (orig != data)
5064                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5065
5066                 tmp = si_halt_rlc(rdev);
5067
5068                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5069                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5070                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5071
5072                 si_update_rlc(rdev, tmp);
5073         } else {
5074                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5075                 data |= 0x00000003;
5076                 if (orig != data)
5077                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5078
5079                 data = RREG32(CP_MEM_SLP_CNTL);
5080                 if (data & CP_MEM_LS_EN) {
5081                         data &= ~CP_MEM_LS_EN;
5082                         WREG32(CP_MEM_SLP_CNTL, data);
5083                 }
5084                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5085                 data |= LS_OVERRIDE | OVERRIDE;
5086                 if (orig != data)
5087                         WREG32(CGTS_SM_CTRL_REG, data);
5088
5089                 tmp = si_halt_rlc(rdev);
5090
5091                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5092                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5093                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5094
5095                 si_update_rlc(rdev, tmp);
5096         }
5097 }
5098
5099 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5100                                bool enable)
5101 {
5102         u32 orig, data, tmp;
5103
5104         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5105                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5106                 tmp |= 0x3fff;
5107                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5108
5109                 orig = data = RREG32(UVD_CGC_CTRL);
5110                 data |= DCM;
5111                 if (orig != data)
5112                         WREG32(UVD_CGC_CTRL, data);
5113
5114                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5115                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5116         } else {
5117                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5118                 tmp &= ~0x3fff;
5119                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5120
5121                 orig = data = RREG32(UVD_CGC_CTRL);
5122                 data &= ~DCM;
5123                 if (orig != data)
5124                         WREG32(UVD_CGC_CTRL, data);
5125
5126                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5127                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5128         }
5129 }
5130
5131 static const u32 mc_cg_registers[] =
5132 {
5133         MC_HUB_MISC_HUB_CG,
5134         MC_HUB_MISC_SIP_CG,
5135         MC_HUB_MISC_VM_CG,
5136         MC_XPB_CLK_GAT,
5137         ATC_MISC_CG,
5138         MC_CITF_MISC_WR_CG,
5139         MC_CITF_MISC_RD_CG,
5140         MC_CITF_MISC_VM_CG,
5141         VM_L2_CG,
5142 };
5143
5144 static void si_enable_mc_ls(struct radeon_device *rdev,
5145                             bool enable)
5146 {
5147         int i;
5148         u32 orig, data;
5149
5150         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5151                 orig = data = RREG32(mc_cg_registers[i]);
5152                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5153                         data |= MC_LS_ENABLE;
5154                 else
5155                         data &= ~MC_LS_ENABLE;
5156                 if (data != orig)
5157                         WREG32(mc_cg_registers[i], data);
5158         }
5159 }
5160
5161 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5162                                bool enable)
5163 {
5164         int i;
5165         u32 orig, data;
5166
5167         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5168                 orig = data = RREG32(mc_cg_registers[i]);
5169                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5170                         data |= MC_CG_ENABLE;
5171                 else
5172                         data &= ~MC_CG_ENABLE;
5173                 if (data != orig)
5174                         WREG32(mc_cg_registers[i], data);
5175         }
5176 }
5177
5178 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5179                                bool enable)
5180 {
5181         u32 orig, data, offset;
5182         int i;
5183
5184         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5185                 for (i = 0; i < 2; i++) {
5186                         if (i == 0)
5187                                 offset = DMA0_REGISTER_OFFSET;
5188                         else
5189                                 offset = DMA1_REGISTER_OFFSET;
5190                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5191                         data &= ~MEM_POWER_OVERRIDE;
5192                         if (data != orig)
5193                                 WREG32(DMA_POWER_CNTL + offset, data);
5194                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5195                 }
5196         } else {
5197                 for (i = 0; i < 2; i++) {
5198                         if (i == 0)
5199                                 offset = DMA0_REGISTER_OFFSET;
5200                         else
5201                                 offset = DMA1_REGISTER_OFFSET;
5202                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5203                         data |= MEM_POWER_OVERRIDE;
5204                         if (data != orig)
5205                                 WREG32(DMA_POWER_CNTL + offset, data);
5206
5207                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5208                         data = 0xff000000;
5209                         if (data != orig)
5210                                 WREG32(DMA_CLK_CTRL + offset, data);
5211                 }
5212         }
5213 }
5214
5215 static void si_enable_bif_mgls(struct radeon_device *rdev,
5216                                bool enable)
5217 {
5218         u32 orig, data;
5219
5220         orig = data = RREG32_PCIE(PCIE_CNTL2);
5221
5222         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5223                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5224                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5225         else
5226                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5227                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5228
5229         if (orig != data)
5230                 WREG32_PCIE(PCIE_CNTL2, data);
5231 }
5232
5233 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5234                                bool enable)
5235 {
5236         u32 orig, data;
5237
5238         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5239
5240         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5241                 data &= ~CLOCK_GATING_DIS;
5242         else
5243                 data |= CLOCK_GATING_DIS;
5244
5245         if (orig != data)
5246                 WREG32(HDP_HOST_PATH_CNTL, data);
5247 }
5248
5249 static void si_enable_hdp_ls(struct radeon_device *rdev,
5250                              bool enable)
5251 {
5252         u32 orig, data;
5253
5254         orig = data = RREG32(HDP_MEM_POWER_LS);
5255
5256         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5257                 data |= HDP_LS_ENABLE;
5258         else
5259                 data &= ~HDP_LS_ENABLE;
5260
5261         if (orig != data)
5262                 WREG32(HDP_MEM_POWER_LS, data);
5263 }
5264
5265 void si_update_cg(struct radeon_device *rdev,
5266                   u32 block, bool enable)
5267 {
5268         if (block & RADEON_CG_BLOCK_GFX) {
5269                 si_enable_gui_idle_interrupt(rdev, false);
5270                 /* order matters! */
5271                 if (enable) {
5272                         si_enable_mgcg(rdev, true);
5273                         si_enable_cgcg(rdev, true);
5274                 } else {
5275                         si_enable_cgcg(rdev, false);
5276                         si_enable_mgcg(rdev, false);
5277                 }
5278                 si_enable_gui_idle_interrupt(rdev, true);
5279         }
5280
5281         if (block & RADEON_CG_BLOCK_MC) {
5282                 si_enable_mc_mgcg(rdev, enable);
5283                 si_enable_mc_ls(rdev, enable);
5284         }
5285
5286         if (block & RADEON_CG_BLOCK_SDMA) {
5287                 si_enable_dma_mgcg(rdev, enable);
5288         }
5289
5290         if (block & RADEON_CG_BLOCK_BIF) {
5291                 si_enable_bif_mgls(rdev, enable);
5292         }
5293
5294         if (block & RADEON_CG_BLOCK_UVD) {
5295                 if (rdev->has_uvd) {
5296                         si_enable_uvd_mgcg(rdev, enable);
5297                 }
5298         }
5299
5300         if (block & RADEON_CG_BLOCK_HDP) {
5301                 si_enable_hdp_mgcg(rdev, enable);
5302                 si_enable_hdp_ls(rdev, enable);
5303         }
5304 }
5305
5306 static void si_init_cg(struct radeon_device *rdev)
5307 {
5308         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5309                             RADEON_CG_BLOCK_MC |
5310                             RADEON_CG_BLOCK_SDMA |
5311                             RADEON_CG_BLOCK_BIF |
5312                             RADEON_CG_BLOCK_HDP), true);
5313         if (rdev->has_uvd) {
5314                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5315                 si_init_uvd_internal_cg(rdev);
5316         }
5317 }
5318
5319 static void si_fini_cg(struct radeon_device *rdev)
5320 {
5321         if (rdev->has_uvd) {
5322                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5323         }
5324         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5325                             RADEON_CG_BLOCK_MC |
5326                             RADEON_CG_BLOCK_SDMA |
5327                             RADEON_CG_BLOCK_BIF |
5328                             RADEON_CG_BLOCK_HDP), false);
5329 }
5330
5331 u32 si_get_csb_size(struct radeon_device *rdev)
5332 {
5333         u32 count = 0;
5334         const struct cs_section_def *sect = NULL;
5335         const struct cs_extent_def *ext = NULL;
5336
5337         if (rdev->rlc.cs_data == NULL)
5338                 return 0;
5339
5340         /* begin clear state */
5341         count += 2;
5342         /* context control state */
5343         count += 3;
5344
5345         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5346                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5347                         if (sect->id == SECT_CONTEXT)
5348                                 count += 2 + ext->reg_count;
5349                         else
5350                                 return 0;
5351                 }
5352         }
5353         /* pa_sc_raster_config */
5354         count += 3;
5355         /* end clear state */
5356         count += 2;
5357         /* clear state */
5358         count += 2;
5359
5360         return count;
5361 }
5362
5363 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5364 {
5365         u32 count = 0, i;
5366         const struct cs_section_def *sect = NULL;
5367         const struct cs_extent_def *ext = NULL;
5368
5369         if (rdev->rlc.cs_data == NULL)
5370                 return;
5371         if (buffer == NULL)
5372                 return;
5373
5374         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5375         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5376
5377         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5378         buffer[count++] = 0x80000000;
5379         buffer[count++] = 0x80000000;
5380
5381         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5382                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5383                         if (sect->id == SECT_CONTEXT) {
5384                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5385                                 buffer[count++] = ext->reg_index - 0xa000;
5386                                 for (i = 0; i < ext->reg_count; i++)
5387                                         buffer[count++] = ext->extent[i];
5388                         } else {
5389                                 return;
5390                         }
5391                 }
5392         }
5393
5394         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5395         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5396         switch (rdev->family) {
5397         case CHIP_TAHITI:
5398         case CHIP_PITCAIRN:
5399                 buffer[count++] = 0x2a00126a;
5400                 break;
5401         case CHIP_VERDE:
5402                 buffer[count++] = 0x0000124a;
5403                 break;
5404         case CHIP_OLAND:
5405                 buffer[count++] = 0x00000082;
5406                 break;
5407         case CHIP_HAINAN:
5408                 buffer[count++] = 0x00000000;
5409                 break;
5410         default:
5411                 buffer[count++] = 0x00000000;
5412                 break;
5413         }
5414
5415         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5416         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5417
5418         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5419         buffer[count++] = 0;
5420 }
5421
5422 static void si_init_pg(struct radeon_device *rdev)
5423 {
5424         if (rdev->pg_flags) {
5425                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5426                         si_init_dma_pg(rdev);
5427                 }
5428                 si_init_ao_cu_mask(rdev);
5429                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5430                         si_init_gfx_cgpg(rdev);
5431                 }
5432                 si_enable_dma_pg(rdev, true);
5433                 si_enable_gfx_cgpg(rdev, true);
5434         } else {
5435                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5436                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5437         }
5438 }
5439
5440 static void si_fini_pg(struct radeon_device *rdev)
5441 {
5442         if (rdev->pg_flags) {
5443                 si_enable_dma_pg(rdev, false);
5444                 si_enable_gfx_cgpg(rdev, false);
5445         }
5446 }
5447
5448 /*
5449  * RLC
5450  */
5451 void si_rlc_reset(struct radeon_device *rdev)
5452 {
5453         u32 tmp = RREG32(GRBM_SOFT_RESET);
5454
5455         tmp |= SOFT_RESET_RLC;
5456         WREG32(GRBM_SOFT_RESET, tmp);
5457         udelay(50);
5458         tmp &= ~SOFT_RESET_RLC;
5459         WREG32(GRBM_SOFT_RESET, tmp);
5460         udelay(50);
5461 }
5462
5463 static void si_rlc_stop(struct radeon_device *rdev)
5464 {
5465         WREG32(RLC_CNTL, 0);
5466
5467         si_enable_gui_idle_interrupt(rdev, false);
5468
5469         si_wait_for_rlc_serdes(rdev);
5470 }
5471
5472 static void si_rlc_start(struct radeon_device *rdev)
5473 {
5474         WREG32(RLC_CNTL, RLC_ENABLE);
5475
5476         si_enable_gui_idle_interrupt(rdev, true);
5477
5478         udelay(50);
5479 }
5480
5481 static bool si_lbpw_supported(struct radeon_device *rdev)
5482 {
5483         u32 tmp;
5484
5485         /* Enable LBPW only for DDR3 */
5486         tmp = RREG32(MC_SEQ_MISC0);
5487         if ((tmp & 0xF0000000) == 0xB0000000)
5488                 return true;
5489         return false;
5490 }
5491
5492 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5493 {
5494         u32 tmp;
5495
5496         tmp = RREG32(RLC_LB_CNTL);
5497         if (enable)
5498                 tmp |= LOAD_BALANCE_ENABLE;
5499         else
5500                 tmp &= ~LOAD_BALANCE_ENABLE;
5501         WREG32(RLC_LB_CNTL, tmp);
5502
5503         if (!enable) {
5504                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5505                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5506         }
5507 }
5508
5509 static int si_rlc_resume(struct radeon_device *rdev)
5510 {
5511         u32 i;
5512         const __be32 *fw_data;
5513
5514         if (!rdev->rlc_fw)
5515                 return -EINVAL;
5516
5517         si_rlc_stop(rdev);
5518
5519         si_rlc_reset(rdev);
5520
5521         si_init_pg(rdev);
5522
5523         si_init_cg(rdev);
5524
5525         WREG32(RLC_RL_BASE, 0);
5526         WREG32(RLC_RL_SIZE, 0);
5527         WREG32(RLC_LB_CNTL, 0);
5528         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5529         WREG32(RLC_LB_CNTR_INIT, 0);
5530         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5531
5532         WREG32(RLC_MC_CNTL, 0);
5533         WREG32(RLC_UCODE_CNTL, 0);
5534
5535         fw_data = (const __be32 *)rdev->rlc_fw->data;
5536         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5537                 WREG32(RLC_UCODE_ADDR, i);
5538                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5539         }
5540         WREG32(RLC_UCODE_ADDR, 0);
5541
5542         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5543
5544         si_rlc_start(rdev);
5545
5546         return 0;
5547 }
5548
5549 static void si_enable_interrupts(struct radeon_device *rdev)
5550 {
5551         u32 ih_cntl = RREG32(IH_CNTL);
5552         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5553
5554         ih_cntl |= ENABLE_INTR;
5555         ih_rb_cntl |= IH_RB_ENABLE;
5556         WREG32(IH_CNTL, ih_cntl);
5557         WREG32(IH_RB_CNTL, ih_rb_cntl);
5558         rdev->ih.enabled = true;
5559 }
5560
5561 static void si_disable_interrupts(struct radeon_device *rdev)
5562 {
5563         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5564         u32 ih_cntl = RREG32(IH_CNTL);
5565
5566         ih_rb_cntl &= ~IH_RB_ENABLE;
5567         ih_cntl &= ~ENABLE_INTR;
5568         WREG32(IH_RB_CNTL, ih_rb_cntl);
5569         WREG32(IH_CNTL, ih_cntl);
5570         /* set rptr, wptr to 0 */
5571         WREG32(IH_RB_RPTR, 0);
5572         WREG32(IH_RB_WPTR, 0);
5573         rdev->ih.enabled = false;
5574         rdev->ih.rptr = 0;
5575 }
5576
5577 static void si_disable_interrupt_state(struct radeon_device *rdev)
5578 {
5579         u32 tmp;
5580
5581         tmp = RREG32(CP_INT_CNTL_RING0) &
5582                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5583         WREG32(CP_INT_CNTL_RING0, tmp);
5584         WREG32(CP_INT_CNTL_RING1, 0);
5585         WREG32(CP_INT_CNTL_RING2, 0);
5586         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5587         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5588         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5589         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5590         WREG32(GRBM_INT_CNTL, 0);
5591         if (rdev->num_crtc >= 2) {
5592                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5593                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5594         }
5595         if (rdev->num_crtc >= 4) {
5596                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5597                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5598         }
5599         if (rdev->num_crtc >= 6) {
5600                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5601                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5602         }
5603
5604         if (rdev->num_crtc >= 2) {
5605                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5606                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5607         }
5608         if (rdev->num_crtc >= 4) {
5609                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5610                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5611         }
5612         if (rdev->num_crtc >= 6) {
5613                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5614                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5615         }
5616
5617         if (!ASIC_IS_NODCE(rdev)) {
5618                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5619
5620                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5621                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5622                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5623                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5624                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5625                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5626                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5627                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5628                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5629                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5630                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5631                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5632         }
5633 }
5634
5635 static int si_irq_init(struct radeon_device *rdev)
5636 {
5637         int ret = 0;
5638         int rb_bufsz;
5639         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5640
5641         /* allocate ring */
5642         ret = r600_ih_ring_alloc(rdev);
5643         if (ret)
5644                 return ret;
5645
5646         /* disable irqs */
5647         si_disable_interrupts(rdev);
5648
5649         /* init rlc */
5650         ret = si_rlc_resume(rdev);
5651         if (ret) {
5652                 r600_ih_ring_fini(rdev);
5653                 return ret;
5654         }
5655
5656         /* setup interrupt control */
5657         /* set dummy read address to ring address */
5658         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5659         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5660         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5661          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5662          */
5663         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5664         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5665         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5666         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5667
5668         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5669         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5670
5671         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5672                       IH_WPTR_OVERFLOW_CLEAR |
5673                       (rb_bufsz << 1));
5674
5675         if (rdev->wb.enabled)
5676                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5677
5678         /* set the writeback address whether it's enabled or not */
5679         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5680         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5681
5682         WREG32(IH_RB_CNTL, ih_rb_cntl);
5683
5684         /* set rptr, wptr to 0 */
5685         WREG32(IH_RB_RPTR, 0);
5686         WREG32(IH_RB_WPTR, 0);
5687
5688         /* Default settings for IH_CNTL (disabled at first) */
5689         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5690         /* RPTR_REARM only works if msi's are enabled */
5691         if (rdev->msi_enabled)
5692                 ih_cntl |= RPTR_REARM;
5693         WREG32(IH_CNTL, ih_cntl);
5694
5695         /* force the active interrupt state to all disabled */
5696         si_disable_interrupt_state(rdev);
5697
5698         pci_set_master(rdev->pdev);
5699
5700         /* enable irqs */
5701         si_enable_interrupts(rdev);
5702
5703         return ret;
5704 }
5705
5706 int si_irq_set(struct radeon_device *rdev)
5707 {
5708         u32 cp_int_cntl;
5709         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5710         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5711         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5712         u32 grbm_int_cntl = 0;
5713         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5714         u32 dma_cntl, dma_cntl1;
5715         u32 thermal_int = 0;
5716
5717         if (!rdev->irq.installed) {
5718                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5719                 return -EINVAL;
5720         }
5721         /* don't enable anything if the ih is disabled */
5722         if (!rdev->ih.enabled) {
5723                 si_disable_interrupts(rdev);
5724                 /* force the active interrupt state to all disabled */
5725                 si_disable_interrupt_state(rdev);
5726                 return 0;
5727         }
5728
5729         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5730                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5731
5732         if (!ASIC_IS_NODCE(rdev)) {
5733                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5734                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5735                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5736                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5737                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5738                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5739         }
5740
5741         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5742         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5743
5744         thermal_int = RREG32(CG_THERMAL_INT) &
5745                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5746
5747         /* enable CP interrupts on all rings */
5748         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5749                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5750                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5751         }
5752         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5753                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5754                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5755         }
5756         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5757                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5758                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5759         }
5760         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5761                 DRM_DEBUG("si_irq_set: sw int dma\n");
5762                 dma_cntl |= TRAP_ENABLE;
5763         }
5764
5765         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5766                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5767                 dma_cntl1 |= TRAP_ENABLE;
5768         }
5769         if (rdev->irq.crtc_vblank_int[0] ||
5770             atomic_read(&rdev->irq.pflip[0])) {
5771                 DRM_DEBUG("si_irq_set: vblank 0\n");
5772                 crtc1 |= VBLANK_INT_MASK;
5773         }
5774         if (rdev->irq.crtc_vblank_int[1] ||
5775             atomic_read(&rdev->irq.pflip[1])) {
5776                 DRM_DEBUG("si_irq_set: vblank 1\n");
5777                 crtc2 |= VBLANK_INT_MASK;
5778         }
5779         if (rdev->irq.crtc_vblank_int[2] ||
5780             atomic_read(&rdev->irq.pflip[2])) {
5781                 DRM_DEBUG("si_irq_set: vblank 2\n");
5782                 crtc3 |= VBLANK_INT_MASK;
5783         }
5784         if (rdev->irq.crtc_vblank_int[3] ||
5785             atomic_read(&rdev->irq.pflip[3])) {
5786                 DRM_DEBUG("si_irq_set: vblank 3\n");
5787                 crtc4 |= VBLANK_INT_MASK;
5788         }
5789         if (rdev->irq.crtc_vblank_int[4] ||
5790             atomic_read(&rdev->irq.pflip[4])) {
5791                 DRM_DEBUG("si_irq_set: vblank 4\n");
5792                 crtc5 |= VBLANK_INT_MASK;
5793         }
5794         if (rdev->irq.crtc_vblank_int[5] ||
5795             atomic_read(&rdev->irq.pflip[5])) {
5796                 DRM_DEBUG("si_irq_set: vblank 5\n");
5797                 crtc6 |= VBLANK_INT_MASK;
5798         }
5799         if (rdev->irq.hpd[0]) {
5800                 DRM_DEBUG("si_irq_set: hpd 1\n");
5801                 hpd1 |= DC_HPDx_INT_EN;
5802         }
5803         if (rdev->irq.hpd[1]) {
5804                 DRM_DEBUG("si_irq_set: hpd 2\n");
5805                 hpd2 |= DC_HPDx_INT_EN;
5806         }
5807         if (rdev->irq.hpd[2]) {
5808                 DRM_DEBUG("si_irq_set: hpd 3\n");
5809                 hpd3 |= DC_HPDx_INT_EN;
5810         }
5811         if (rdev->irq.hpd[3]) {
5812                 DRM_DEBUG("si_irq_set: hpd 4\n");
5813                 hpd4 |= DC_HPDx_INT_EN;
5814         }
5815         if (rdev->irq.hpd[4]) {
5816                 DRM_DEBUG("si_irq_set: hpd 5\n");
5817                 hpd5 |= DC_HPDx_INT_EN;
5818         }
5819         if (rdev->irq.hpd[5]) {
5820                 DRM_DEBUG("si_irq_set: hpd 6\n");
5821                 hpd6 |= DC_HPDx_INT_EN;
5822         }
5823
5824         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5825         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5826         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5827
5828         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5829         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5830
5831         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5832
5833         if (rdev->irq.dpm_thermal) {
5834                 DRM_DEBUG("dpm thermal\n");
5835                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5836         }
5837
5838         if (rdev->num_crtc >= 2) {
5839                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5840                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5841         }
5842         if (rdev->num_crtc >= 4) {
5843                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5844                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5845         }
5846         if (rdev->num_crtc >= 6) {
5847                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5848                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5849         }
5850
5851         if (rdev->num_crtc >= 2) {
5852                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5853                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5854         }
5855         if (rdev->num_crtc >= 4) {
5856                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5857                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5858         }
5859         if (rdev->num_crtc >= 6) {
5860                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5861                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5862         }
5863
5864         if (!ASIC_IS_NODCE(rdev)) {
5865                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5866                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5867                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5868                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5869                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5870                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5871         }
5872
5873         WREG32(CG_THERMAL_INT, thermal_int);
5874
5875         return 0;
5876 }
5877
5878 static inline void si_irq_ack(struct radeon_device *rdev)
5879 {
5880         u32 tmp;
5881
5882         if (ASIC_IS_NODCE(rdev))
5883                 return;
5884
5885         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5886         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5887         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5888         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5889         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5890         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5891         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5892         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5893         if (rdev->num_crtc >= 4) {
5894                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5895                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5896         }
5897         if (rdev->num_crtc >= 6) {
5898                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5899                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5900         }
5901
5902         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5903                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5904         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5905                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5906         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5907                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5908         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5909                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5910         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5911                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5912         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5913                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5914
5915         if (rdev->num_crtc >= 4) {
5916                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5917                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5918                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5919                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5920                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5921                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5922                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5923                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5924                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5925                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5926                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5927                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5928         }
5929
5930         if (rdev->num_crtc >= 6) {
5931                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5932                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5933                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5934                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5935                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5936                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5937                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5938                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5939                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5940                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5941                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5942                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5943         }
5944
5945         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5946                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5947                 tmp |= DC_HPDx_INT_ACK;
5948                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5949         }
5950         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5951                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5952                 tmp |= DC_HPDx_INT_ACK;
5953                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5954         }
5955         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5956                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5957                 tmp |= DC_HPDx_INT_ACK;
5958                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5959         }
5960         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5961                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5962                 tmp |= DC_HPDx_INT_ACK;
5963                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5964         }
5965         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5966                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5967                 tmp |= DC_HPDx_INT_ACK;
5968                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5969         }
5970         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5971                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5972                 tmp |= DC_HPDx_INT_ACK;
5973                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5974         }
5975 }
5976
5977 static void si_irq_disable(struct radeon_device *rdev)
5978 {
5979         si_disable_interrupts(rdev);
5980         /* Wait and acknowledge irq */
5981         mdelay(1);
5982         si_irq_ack(rdev);
5983         si_disable_interrupt_state(rdev);
5984 }
5985
5986 static void si_irq_suspend(struct radeon_device *rdev)
5987 {
5988         si_irq_disable(rdev);
5989         si_rlc_stop(rdev);
5990 }
5991
5992 static void si_irq_fini(struct radeon_device *rdev)
5993 {
5994         si_irq_suspend(rdev);
5995         r600_ih_ring_fini(rdev);
5996 }
5997
5998 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5999 {
6000         u32 wptr, tmp;
6001
6002         if (rdev->wb.enabled)
6003                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6004         else
6005                 wptr = RREG32(IH_RB_WPTR);
6006
6007         if (wptr & RB_OVERFLOW) {
6008                 /* When a ring buffer overflow happen start parsing interrupt
6009                  * from the last not overwritten vector (wptr + 16). Hopefully
6010                  * this should allow us to catchup.
6011                  */
6012                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6013                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6014                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6015                 tmp = RREG32(IH_RB_CNTL);
6016                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6017                 WREG32(IH_RB_CNTL, tmp);
6018         }
6019         return (wptr & rdev->ih.ptr_mask);
6020 }
6021
6022 /*        SI IV Ring
6023  * Each IV ring entry is 128 bits:
6024  * [7:0]    - interrupt source id
6025  * [31:8]   - reserved
6026  * [59:32]  - interrupt source data
6027  * [63:60]  - reserved
6028  * [71:64]  - RINGID
6029  * [79:72]  - VMID
6030  * [127:80] - reserved
6031  */
6032 int si_irq_process(struct radeon_device *rdev)
6033 {
6034         u32 wptr;
6035         u32 rptr;
6036         u32 src_id, src_data, ring_id;
6037         u32 ring_index;
6038         bool queue_hotplug = false;
6039         bool queue_thermal = false;
6040         u32 status, addr;
6041
6042         if (!rdev->ih.enabled || rdev->shutdown)
6043                 return IRQ_NONE;
6044
6045         wptr = si_get_ih_wptr(rdev);
6046
6047 restart_ih:
6048         /* is somebody else already processing irqs? */
6049         if (atomic_xchg(&rdev->ih.lock, 1))
6050                 return IRQ_NONE;
6051
6052         rptr = rdev->ih.rptr;
6053         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6054
6055         /* Order reading of wptr vs. reading of IH ring data */
6056         rmb();
6057
6058         /* display interrupts */
6059         si_irq_ack(rdev);
6060
6061         while (rptr != wptr) {
6062                 /* wptr/rptr are in bytes! */
6063                 ring_index = rptr / 4;
6064                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6065                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6066                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6067
6068                 switch (src_id) {
6069                 case 1: /* D1 vblank/vline */
6070                         switch (src_data) {
6071                         case 0: /* D1 vblank */
6072                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6073                                         if (rdev->irq.crtc_vblank_int[0]) {
6074                                                 drm_handle_vblank(rdev->ddev, 0);
6075                                                 rdev->pm.vblank_sync = true;
6076                                                 wake_up(&rdev->irq.vblank_queue);
6077                                         }
6078                                         if (atomic_read(&rdev->irq.pflip[0]))
6079                                                 radeon_crtc_handle_flip(rdev, 0);
6080                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6081                                         DRM_DEBUG("IH: D1 vblank\n");
6082                                 }
6083                                 break;
6084                         case 1: /* D1 vline */
6085                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6086                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6087                                         DRM_DEBUG("IH: D1 vline\n");
6088                                 }
6089                                 break;
6090                         default:
6091                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6092                                 break;
6093                         }
6094                         break;
6095                 case 2: /* D2 vblank/vline */
6096                         switch (src_data) {
6097                         case 0: /* D2 vblank */
6098                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6099                                         if (rdev->irq.crtc_vblank_int[1]) {
6100                                                 drm_handle_vblank(rdev->ddev, 1);
6101                                                 rdev->pm.vblank_sync = true;
6102                                                 wake_up(&rdev->irq.vblank_queue);
6103                                         }
6104                                         if (atomic_read(&rdev->irq.pflip[1]))
6105                                                 radeon_crtc_handle_flip(rdev, 1);
6106                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6107                                         DRM_DEBUG("IH: D2 vblank\n");
6108                                 }
6109                                 break;
6110                         case 1: /* D2 vline */
6111                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6112                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6113                                         DRM_DEBUG("IH: D2 vline\n");
6114                                 }
6115                                 break;
6116                         default:
6117                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6118                                 break;
6119                         }
6120                         break;
6121                 case 3: /* D3 vblank/vline */
6122                         switch (src_data) {
6123                         case 0: /* D3 vblank */
6124                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6125                                         if (rdev->irq.crtc_vblank_int[2]) {
6126                                                 drm_handle_vblank(rdev->ddev, 2);
6127                                                 rdev->pm.vblank_sync = true;
6128                                                 wake_up(&rdev->irq.vblank_queue);
6129                                         }
6130                                         if (atomic_read(&rdev->irq.pflip[2]))
6131                                                 radeon_crtc_handle_flip(rdev, 2);
6132                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6133                                         DRM_DEBUG("IH: D3 vblank\n");
6134                                 }
6135                                 break;
6136                         case 1: /* D3 vline */
6137                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6138                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6139                                         DRM_DEBUG("IH: D3 vline\n");
6140                                 }
6141                                 break;
6142                         default:
6143                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6144                                 break;
6145                         }
6146                         break;
6147                 case 4: /* D4 vblank/vline */
6148                         switch (src_data) {
6149                         case 0: /* D4 vblank */
6150                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6151                                         if (rdev->irq.crtc_vblank_int[3]) {
6152                                                 drm_handle_vblank(rdev->ddev, 3);
6153                                                 rdev->pm.vblank_sync = true;
6154                                                 wake_up(&rdev->irq.vblank_queue);
6155                                         }
6156                                         if (atomic_read(&rdev->irq.pflip[3]))
6157                                                 radeon_crtc_handle_flip(rdev, 3);
6158                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6159                                         DRM_DEBUG("IH: D4 vblank\n");
6160                                 }
6161                                 break;
6162                         case 1: /* D4 vline */
6163                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6164                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6165                                         DRM_DEBUG("IH: D4 vline\n");
6166                                 }
6167                                 break;
6168                         default:
6169                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6170                                 break;
6171                         }
6172                         break;
6173                 case 5: /* D5 vblank/vline */
6174                         switch (src_data) {
6175                         case 0: /* D5 vblank */
6176                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6177                                         if (rdev->irq.crtc_vblank_int[4]) {
6178                                                 drm_handle_vblank(rdev->ddev, 4);
6179                                                 rdev->pm.vblank_sync = true;
6180                                                 wake_up(&rdev->irq.vblank_queue);
6181                                         }
6182                                         if (atomic_read(&rdev->irq.pflip[4]))
6183                                                 radeon_crtc_handle_flip(rdev, 4);
6184                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6185                                         DRM_DEBUG("IH: D5 vblank\n");
6186                                 }
6187                                 break;
6188                         case 1: /* D5 vline */
6189                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6190                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6191                                         DRM_DEBUG("IH: D5 vline\n");
6192                                 }
6193                                 break;
6194                         default:
6195                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6196                                 break;
6197                         }
6198                         break;
6199                 case 6: /* D6 vblank/vline */
6200                         switch (src_data) {
6201                         case 0: /* D6 vblank */
6202                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6203                                         if (rdev->irq.crtc_vblank_int[5]) {
6204                                                 drm_handle_vblank(rdev->ddev, 5);
6205                                                 rdev->pm.vblank_sync = true;
6206                                                 wake_up(&rdev->irq.vblank_queue);
6207                                         }
6208                                         if (atomic_read(&rdev->irq.pflip[5]))
6209                                                 radeon_crtc_handle_flip(rdev, 5);
6210                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6211                                         DRM_DEBUG("IH: D6 vblank\n");
6212                                 }
6213                                 break;
6214                         case 1: /* D6 vline */
6215                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6216                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6217                                         DRM_DEBUG("IH: D6 vline\n");
6218                                 }
6219                                 break;
6220                         default:
6221                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6222                                 break;
6223                         }
6224                         break;
6225                 case 42: /* HPD hotplug */
6226                         switch (src_data) {
6227                         case 0:
6228                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6229                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6230                                         queue_hotplug = true;
6231                                         DRM_DEBUG("IH: HPD1\n");
6232                                 }
6233                                 break;
6234                         case 1:
6235                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6236                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6237                                         queue_hotplug = true;
6238                                         DRM_DEBUG("IH: HPD2\n");
6239                                 }
6240                                 break;
6241                         case 2:
6242                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6243                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6244                                         queue_hotplug = true;
6245                                         DRM_DEBUG("IH: HPD3\n");
6246                                 }
6247                                 break;
6248                         case 3:
6249                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6250                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6251                                         queue_hotplug = true;
6252                                         DRM_DEBUG("IH: HPD4\n");
6253                                 }
6254                                 break;
6255                         case 4:
6256                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6257                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6258                                         queue_hotplug = true;
6259                                         DRM_DEBUG("IH: HPD5\n");
6260                                 }
6261                                 break;
6262                         case 5:
6263                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6264                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6265                                         queue_hotplug = true;
6266                                         DRM_DEBUG("IH: HPD6\n");
6267                                 }
6268                                 break;
6269                         default:
6270                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6271                                 break;
6272                         }
6273                         break;
6274                 case 146:
6275                 case 147:
6276                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6277                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6278                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6279                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6280                                 addr);
6281                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6282                                 status);
6283                         si_vm_decode_fault(rdev, status, addr);
6284                         /* reset addr and status */
6285                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6286                         break;
6287                 case 176: /* RINGID0 CP_INT */
6288                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6289                         break;
6290                 case 177: /* RINGID1 CP_INT */
6291                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6292                         break;
6293                 case 178: /* RINGID2 CP_INT */
6294                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6295                         break;
6296                 case 181: /* CP EOP event */
6297                         DRM_DEBUG("IH: CP EOP\n");
6298                         switch (ring_id) {
6299                         case 0:
6300                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6301                                 break;
6302                         case 1:
6303                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6304                                 break;
6305                         case 2:
6306                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6307                                 break;
6308                         }
6309                         break;
6310                 case 224: /* DMA trap event */
6311                         DRM_DEBUG("IH: DMA trap\n");
6312                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6313                         break;
6314                 case 230: /* thermal low to high */
6315                         DRM_DEBUG("IH: thermal low to high\n");
6316                         rdev->pm.dpm.thermal.high_to_low = false;
6317                         queue_thermal = true;
6318                         break;
6319                 case 231: /* thermal high to low */
6320                         DRM_DEBUG("IH: thermal high to low\n");
6321                         rdev->pm.dpm.thermal.high_to_low = true;
6322                         queue_thermal = true;
6323                         break;
6324                 case 233: /* GUI IDLE */
6325                         DRM_DEBUG("IH: GUI idle\n");
6326                         break;
6327                 case 244: /* DMA trap event */
6328                         DRM_DEBUG("IH: DMA1 trap\n");
6329                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6330                         break;
6331                 default:
6332                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6333                         break;
6334                 }
6335
6336                 /* wptr/rptr are in bytes! */
6337                 rptr += 16;
6338                 rptr &= rdev->ih.ptr_mask;
6339         }
6340         if (queue_hotplug)
6341                 schedule_work(&rdev->hotplug_work);
6342         if (queue_thermal && rdev->pm.dpm_enabled)
6343                 schedule_work(&rdev->pm.dpm.thermal.work);
6344         rdev->ih.rptr = rptr;
6345         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6346         atomic_set(&rdev->ih.lock, 0);
6347
6348         /* make sure wptr hasn't changed while processing */
6349         wptr = si_get_ih_wptr(rdev);
6350         if (wptr != rptr)
6351                 goto restart_ih;
6352
6353         return IRQ_HANDLED;
6354 }
6355
6356 /*
6357  * startup/shutdown callbacks
6358  */
6359 static int si_startup(struct radeon_device *rdev)
6360 {
6361         struct radeon_ring *ring;
6362         int r;
6363
6364         /* enable pcie gen2/3 link */
6365         si_pcie_gen3_enable(rdev);
6366         /* enable aspm */
6367         si_program_aspm(rdev);
6368
6369         /* scratch needs to be initialized before MC */
6370         r = r600_vram_scratch_init(rdev);
6371         if (r)
6372                 return r;
6373
6374         si_mc_program(rdev);
6375
6376         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6377             !rdev->rlc_fw || !rdev->mc_fw) {
6378                 r = si_init_microcode(rdev);
6379                 if (r) {
6380                         DRM_ERROR("Failed to load firmware!\n");
6381                         return r;
6382                 }
6383         }
6384
6385         r = si_mc_load_microcode(rdev);
6386         if (r) {
6387                 DRM_ERROR("Failed to load MC firmware!\n");
6388                 return r;
6389         }
6390
6391         r = si_pcie_gart_enable(rdev);
6392         if (r)
6393                 return r;
6394         si_gpu_init(rdev);
6395
6396         /* allocate rlc buffers */
6397         if (rdev->family == CHIP_VERDE) {
6398                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6399                 rdev->rlc.reg_list_size =
6400                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6401         }
6402         rdev->rlc.cs_data = si_cs_data;
6403         r = sumo_rlc_init(rdev);
6404         if (r) {
6405                 DRM_ERROR("Failed to init rlc BOs!\n");
6406                 return r;
6407         }
6408
6409         /* allocate wb buffer */
6410         r = radeon_wb_init(rdev);
6411         if (r)
6412                 return r;
6413
6414         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6415         if (r) {
6416                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6417                 return r;
6418         }
6419
6420         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6421         if (r) {
6422                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6423                 return r;
6424         }
6425
6426         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6427         if (r) {
6428                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6429                 return r;
6430         }
6431
6432         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6433         if (r) {
6434                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6435                 return r;
6436         }
6437
6438         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6439         if (r) {
6440                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6441                 return r;
6442         }
6443
6444         if (rdev->has_uvd) {
6445                 r = uvd_v2_2_resume(rdev);
6446                 if (!r) {
6447                         r = radeon_fence_driver_start_ring(rdev,
6448                                                            R600_RING_TYPE_UVD_INDEX);
6449                         if (r)
6450                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6451                 }
6452                 if (r)
6453                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6454         }
6455
6456         /* Enable IRQ */
6457         if (!rdev->irq.installed) {
6458                 r = radeon_irq_kms_init(rdev);
6459                 if (r)
6460                         return r;
6461         }
6462
6463         r = si_irq_init(rdev);
6464         if (r) {
6465                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6466                 radeon_irq_kms_fini(rdev);
6467                 return r;
6468         }
6469         si_irq_set(rdev);
6470
6471         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6472         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6473                              CP_RB0_RPTR, CP_RB0_WPTR,
6474                              RADEON_CP_PACKET2);
6475         if (r)
6476                 return r;
6477
6478         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6479         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6480                              CP_RB1_RPTR, CP_RB1_WPTR,
6481                              RADEON_CP_PACKET2);
6482         if (r)
6483                 return r;
6484
6485         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6486         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6487                              CP_RB2_RPTR, CP_RB2_WPTR,
6488                              RADEON_CP_PACKET2);
6489         if (r)
6490                 return r;
6491
6492         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6493         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6494                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6495                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6496                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6497         if (r)
6498                 return r;
6499
6500         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6501         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6502                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6503                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6504                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6505         if (r)
6506                 return r;
6507
6508         r = si_cp_load_microcode(rdev);
6509         if (r)
6510                 return r;
6511         r = si_cp_resume(rdev);
6512         if (r)
6513                 return r;
6514
6515         r = cayman_dma_resume(rdev);
6516         if (r)
6517                 return r;
6518
6519         if (rdev->has_uvd) {
6520                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6521                 if (ring->ring_size) {
6522                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6523                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6524                                              RADEON_CP_PACKET2);
6525                         if (!r)
6526                                 r = uvd_v1_0_init(rdev);
6527                         if (r)
6528                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6529                 }
6530         }
6531
6532         r = radeon_ib_pool_init(rdev);
6533         if (r) {
6534                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6535                 return r;
6536         }
6537
6538         r = radeon_vm_manager_init(rdev);
6539         if (r) {
6540                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6541                 return r;
6542         }
6543
6544         r = dce6_audio_init(rdev);
6545         if (r)
6546                 return r;
6547
6548         return 0;
6549 }
6550
6551 int si_resume(struct radeon_device *rdev)
6552 {
6553         int r;
6554
6555         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6556          * posting will perform necessary task to bring back GPU into good
6557          * shape.
6558          */
6559         /* post card */
6560         atom_asic_init(rdev->mode_info.atom_context);
6561
6562         /* init golden registers */
6563         si_init_golden_registers(rdev);
6564
6565         rdev->accel_working = true;
6566         r = si_startup(rdev);
6567         if (r) {
6568                 DRM_ERROR("si startup failed on resume\n");
6569                 rdev->accel_working = false;
6570                 return r;
6571         }
6572
6573         return r;
6574
6575 }
6576
6577 int si_suspend(struct radeon_device *rdev)
6578 {
6579         dce6_audio_fini(rdev);
6580         radeon_vm_manager_fini(rdev);
6581         si_cp_enable(rdev, false);
6582         cayman_dma_stop(rdev);
6583         if (rdev->has_uvd) {
6584                 uvd_v1_0_fini(rdev);
6585                 radeon_uvd_suspend(rdev);
6586         }
6587         si_fini_pg(rdev);
6588         si_fini_cg(rdev);
6589         si_irq_suspend(rdev);
6590         radeon_wb_disable(rdev);
6591         si_pcie_gart_disable(rdev);
6592         return 0;
6593 }
6594
6595 /* Plan is to move initialization in that function and use
6596  * helper function so that radeon_device_init pretty much
6597  * do nothing more than calling asic specific function. This
6598  * should also allow to remove a bunch of callback function
6599  * like vram_info.
6600  */
6601 int si_init(struct radeon_device *rdev)
6602 {
6603         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6604         int r;
6605
6606         /* Read BIOS */
6607         if (!radeon_get_bios(rdev)) {
6608                 if (ASIC_IS_AVIVO(rdev))
6609                         return -EINVAL;
6610         }
6611         /* Must be an ATOMBIOS */
6612         if (!rdev->is_atom_bios) {
6613                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6614                 return -EINVAL;
6615         }
6616         r = radeon_atombios_init(rdev);
6617         if (r)
6618                 return r;
6619
6620         /* Post card if necessary */
6621         if (!radeon_card_posted(rdev)) {
6622                 if (!rdev->bios) {
6623                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6624                         return -EINVAL;
6625                 }
6626                 DRM_INFO("GPU not posted. posting now...\n");
6627                 atom_asic_init(rdev->mode_info.atom_context);
6628         }
6629         /* init golden registers */
6630         si_init_golden_registers(rdev);
6631         /* Initialize scratch registers */
6632         si_scratch_init(rdev);
6633         /* Initialize surface registers */
6634         radeon_surface_init(rdev);
6635         /* Initialize clocks */
6636         radeon_get_clock_info(rdev->ddev);
6637
6638         /* Fence driver */
6639         r = radeon_fence_driver_init(rdev);
6640         if (r)
6641                 return r;
6642
6643         /* initialize memory controller */
6644         r = si_mc_init(rdev);
6645         if (r)
6646                 return r;
6647         /* Memory manager */
6648         r = radeon_bo_init(rdev);
6649         if (r)
6650                 return r;
6651
6652         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6653         ring->ring_obj = NULL;
6654         r600_ring_init(rdev, ring, 1024 * 1024);
6655
6656         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6657         ring->ring_obj = NULL;
6658         r600_ring_init(rdev, ring, 1024 * 1024);
6659
6660         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6661         ring->ring_obj = NULL;
6662         r600_ring_init(rdev, ring, 1024 * 1024);
6663
6664         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6665         ring->ring_obj = NULL;
6666         r600_ring_init(rdev, ring, 64 * 1024);
6667
6668         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6669         ring->ring_obj = NULL;
6670         r600_ring_init(rdev, ring, 64 * 1024);
6671
6672         if (rdev->has_uvd) {
6673                 r = radeon_uvd_init(rdev);
6674                 if (!r) {
6675                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6676                         ring->ring_obj = NULL;
6677                         r600_ring_init(rdev, ring, 4096);
6678                 }
6679         }
6680
6681         rdev->ih.ring_obj = NULL;
6682         r600_ih_ring_init(rdev, 64 * 1024);
6683
6684         r = r600_pcie_gart_init(rdev);
6685         if (r)
6686                 return r;
6687
6688         rdev->accel_working = true;
6689         r = si_startup(rdev);
6690         if (r) {
6691                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6692                 si_cp_fini(rdev);
6693                 cayman_dma_fini(rdev);
6694                 si_irq_fini(rdev);
6695                 sumo_rlc_fini(rdev);
6696                 radeon_wb_fini(rdev);
6697                 radeon_ib_pool_fini(rdev);
6698                 radeon_vm_manager_fini(rdev);
6699                 radeon_irq_kms_fini(rdev);
6700                 si_pcie_gart_fini(rdev);
6701                 rdev->accel_working = false;
6702         }
6703
6704         /* Don't start up if the MC ucode is missing.
6705          * The default clocks and voltages before the MC ucode
6706          * is loaded are not suffient for advanced operations.
6707          */
6708         if (!rdev->mc_fw) {
6709                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6710                 return -EINVAL;
6711         }
6712
6713         return 0;
6714 }
6715
6716 void si_fini(struct radeon_device *rdev)
6717 {
6718         si_cp_fini(rdev);
6719         cayman_dma_fini(rdev);
6720         si_fini_pg(rdev);
6721         si_fini_cg(rdev);
6722         si_irq_fini(rdev);
6723         sumo_rlc_fini(rdev);
6724         radeon_wb_fini(rdev);
6725         radeon_vm_manager_fini(rdev);
6726         radeon_ib_pool_fini(rdev);
6727         radeon_irq_kms_fini(rdev);
6728         if (rdev->has_uvd) {
6729                 uvd_v1_0_fini(rdev);
6730                 radeon_uvd_fini(rdev);
6731         }
6732         si_pcie_gart_fini(rdev);
6733         r600_vram_scratch_fini(rdev);
6734         radeon_gem_fini(rdev);
6735         radeon_fence_driver_fini(rdev);
6736         radeon_bo_fini(rdev);
6737         radeon_atombios_fini(rdev);
6738         kfree(rdev->bios);
6739         rdev->bios = NULL;
6740 }
6741
6742 /**
6743  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6744  *
6745  * @rdev: radeon_device pointer
6746  *
6747  * Fetches a GPU clock counter snapshot (SI).
6748  * Returns the 64 bit clock counter snapshot.
6749  */
6750 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6751 {
6752         uint64_t clock;
6753
6754         mutex_lock(&rdev->gpu_clock_mutex);
6755         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6756         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6757                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6758         mutex_unlock(&rdev->gpu_clock_mutex);
6759         return clock;
6760 }
6761
6762 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6763 {
6764         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6765         int r;
6766
6767         /* bypass vclk and dclk with bclk */
6768         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6769                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6770                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6771
6772         /* put PLL in bypass mode */
6773         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6774
6775         if (!vclk || !dclk) {
6776                 /* keep the Bypass mode, put PLL to sleep */
6777                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6778                 return 0;
6779         }
6780
6781         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6782                                           16384, 0x03FFFFFF, 0, 128, 5,
6783                                           &fb_div, &vclk_div, &dclk_div);
6784         if (r)
6785                 return r;
6786
6787         /* set RESET_ANTI_MUX to 0 */
6788         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6789
6790         /* set VCO_MODE to 1 */
6791         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6792
6793         /* toggle UPLL_SLEEP to 1 then back to 0 */
6794         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6795         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6796
6797         /* deassert UPLL_RESET */
6798         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6799
6800         mdelay(1);
6801
6802         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6803         if (r)
6804                 return r;
6805
6806         /* assert UPLL_RESET again */
6807         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6808
6809         /* disable spread spectrum. */
6810         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6811
6812         /* set feedback divider */
6813         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6814
6815         /* set ref divider to 0 */
6816         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6817
6818         if (fb_div < 307200)
6819                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6820         else
6821                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6822
6823         /* set PDIV_A and PDIV_B */
6824         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6825                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6826                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6827
6828         /* give the PLL some time to settle */
6829         mdelay(15);
6830
6831         /* deassert PLL_RESET */
6832         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6833
6834         mdelay(15);
6835
6836         /* switch from bypass mode to normal mode */
6837         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6838
6839         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6840         if (r)
6841                 return r;
6842
6843         /* switch VCLK and DCLK selection */
6844         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6845                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6846                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6847
6848         mdelay(100);
6849
6850         return 0;
6851 }
6852
6853 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6854 {
6855         struct pci_dev *root = rdev->pdev->bus->self;
6856         int bridge_pos, gpu_pos;
6857         u32 speed_cntl, mask, current_data_rate;
6858         int ret, i;
6859         u16 tmp16;
6860
6861         if (radeon_pcie_gen2 == 0)
6862                 return;
6863
6864         if (rdev->flags & RADEON_IS_IGP)
6865                 return;
6866
6867         if (!(rdev->flags & RADEON_IS_PCIE))
6868                 return;
6869
6870         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6871         if (ret != 0)
6872                 return;
6873
6874         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6875                 return;
6876
6877         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6878         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6879                 LC_CURRENT_DATA_RATE_SHIFT;
6880         if (mask & DRM_PCIE_SPEED_80) {
6881                 if (current_data_rate == 2) {
6882                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6883                         return;
6884                 }
6885                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6886         } else if (mask & DRM_PCIE_SPEED_50) {
6887                 if (current_data_rate == 1) {
6888                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6889                         return;
6890                 }
6891                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6892         }
6893
6894         bridge_pos = pci_pcie_cap(root);
6895         if (!bridge_pos)
6896                 return;
6897
6898         gpu_pos = pci_pcie_cap(rdev->pdev);
6899         if (!gpu_pos)
6900                 return;
6901
6902         if (mask & DRM_PCIE_SPEED_80) {
6903                 /* re-try equalization if gen3 is not already enabled */
6904                 if (current_data_rate != 2) {
6905                         u16 bridge_cfg, gpu_cfg;
6906                         u16 bridge_cfg2, gpu_cfg2;
6907                         u32 max_lw, current_lw, tmp;
6908
6909                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6910                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6911
6912                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6913                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6914
6915                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6916                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6917
6918                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6919                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6920                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6921
6922                         if (current_lw < max_lw) {
6923                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6924                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6925                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6926                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6927                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6928                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6929                                 }
6930                         }
6931
6932                         for (i = 0; i < 10; i++) {
6933                                 /* check status */
6934                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6935                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6936                                         break;
6937
6938                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6939                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6940
6941                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6942                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6943
6944                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6945                                 tmp |= LC_SET_QUIESCE;
6946                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6947
6948                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6949                                 tmp |= LC_REDO_EQ;
6950                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6951
6952                                 mdelay(100);
6953
6954                                 /* linkctl */
6955                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6956                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6957                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6958                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6959
6960                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6961                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6962                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6963                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6964
6965                                 /* linkctl2 */
6966                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6967                                 tmp16 &= ~((1 << 4) | (7 << 9));
6968                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6969                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6970
6971                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6972                                 tmp16 &= ~((1 << 4) | (7 << 9));
6973                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6974                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6975
6976                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6977                                 tmp &= ~LC_SET_QUIESCE;
6978                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6979                         }
6980                 }
6981         }
6982
6983         /* set the link speed */
6984         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6985         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6986         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6987
6988         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6989         tmp16 &= ~0xf;
6990         if (mask & DRM_PCIE_SPEED_80)
6991                 tmp16 |= 3; /* gen3 */
6992         else if (mask & DRM_PCIE_SPEED_50)
6993                 tmp16 |= 2; /* gen2 */
6994         else
6995                 tmp16 |= 1; /* gen1 */
6996         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6997
6998         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6999         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7000         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7001
7002         for (i = 0; i < rdev->usec_timeout; i++) {
7003                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7004                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7005                         break;
7006                 udelay(1);
7007         }
7008 }
7009
7010 static void si_program_aspm(struct radeon_device *rdev)
7011 {
7012         u32 data, orig;
7013         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7014         bool disable_clkreq = false;
7015
7016         if (radeon_aspm == 0)
7017                 return;
7018
7019         if (!(rdev->flags & RADEON_IS_PCIE))
7020                 return;
7021
7022         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7023         data &= ~LC_XMIT_N_FTS_MASK;
7024         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7025         if (orig != data)
7026                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7027
7028         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7029         data |= LC_GO_TO_RECOVERY;
7030         if (orig != data)
7031                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7032
7033         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7034         data |= P_IGNORE_EDB_ERR;
7035         if (orig != data)
7036                 WREG32_PCIE(PCIE_P_CNTL, data);
7037
7038         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7039         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7040         data |= LC_PMI_TO_L1_DIS;
7041         if (!disable_l0s)
7042                 data |= LC_L0S_INACTIVITY(7);
7043
7044         if (!disable_l1) {
7045                 data |= LC_L1_INACTIVITY(7);
7046                 data &= ~LC_PMI_TO_L1_DIS;
7047                 if (orig != data)
7048                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7049
7050                 if (!disable_plloff_in_l1) {
7051                         bool clk_req_support;
7052
7053                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7054                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7055                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7056                         if (orig != data)
7057                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7058
7059                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7060                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7061                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7062                         if (orig != data)
7063                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7064
7065                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7066                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7067                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7068                         if (orig != data)
7069                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7070
7071                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7072                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7073                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7074                         if (orig != data)
7075                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7076
7077                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7078                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7079                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7080                                 if (orig != data)
7081                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7082
7083                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7084                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7085                                 if (orig != data)
7086                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7087
7088                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7089                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7090                                 if (orig != data)
7091                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7092
7093                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7094                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7095                                 if (orig != data)
7096                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7097
7098                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7099                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7100                                 if (orig != data)
7101                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7102
7103                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7104                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7105                                 if (orig != data)
7106                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7107
7108                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7109                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7110                                 if (orig != data)
7111                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7112
7113                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7114                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7115                                 if (orig != data)
7116                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7117                         }
7118                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7119                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7120                         data |= LC_DYN_LANES_PWR_STATE(3);
7121                         if (orig != data)
7122                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7123
7124                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7125                         data &= ~LS2_EXIT_TIME_MASK;
7126                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7127                                 data |= LS2_EXIT_TIME(5);
7128                         if (orig != data)
7129                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7130
7131                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7132                         data &= ~LS2_EXIT_TIME_MASK;
7133                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7134                                 data |= LS2_EXIT_TIME(5);
7135                         if (orig != data)
7136                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7137
7138                         if (!disable_clkreq) {
7139                                 struct pci_dev *root = rdev->pdev->bus->self;
7140                                 u32 lnkcap;
7141
7142                                 clk_req_support = false;
7143                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7144                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7145                                         clk_req_support = true;
7146                         } else {
7147                                 clk_req_support = false;
7148                         }
7149
7150                         if (clk_req_support) {
7151                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7152                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7153                                 if (orig != data)
7154                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7155
7156                                 orig = data = RREG32(THM_CLK_CNTL);
7157                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7158                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7159                                 if (orig != data)
7160                                         WREG32(THM_CLK_CNTL, data);
7161
7162                                 orig = data = RREG32(MISC_CLK_CNTL);
7163                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7164                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7165                                 if (orig != data)
7166                                         WREG32(MISC_CLK_CNTL, data);
7167
7168                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7169                                 data &= ~BCLK_AS_XCLK;
7170                                 if (orig != data)
7171                                         WREG32(CG_CLKPIN_CNTL, data);
7172
7173                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7174                                 data &= ~FORCE_BIF_REFCLK_EN;
7175                                 if (orig != data)
7176                                         WREG32(CG_CLKPIN_CNTL_2, data);
7177
7178                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7179                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7180                                 data |= MPLL_CLKOUT_SEL(4);
7181                                 if (orig != data)
7182                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7183
7184                                 orig = data = RREG32(SPLL_CNTL_MODE);
7185                                 data &= ~SPLL_REFCLK_SEL_MASK;
7186                                 if (orig != data)
7187                                         WREG32(SPLL_CNTL_MODE, data);
7188                         }
7189                 }
7190         } else {
7191                 if (orig != data)
7192                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7193         }
7194
7195         orig = data = RREG32_PCIE(PCIE_CNTL2);
7196         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7197         if (orig != data)
7198                 WREG32_PCIE(PCIE_CNTL2, data);
7199
7200         if (!disable_l0s) {
7201                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7202                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7203                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7204                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7205                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7206                                 data &= ~LC_L0S_INACTIVITY_MASK;
7207                                 if (orig != data)
7208                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7209                         }
7210                 }
7211         }
7212 }