]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/si.c
Merge tag 'sound-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
[karo-tx-linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
74 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
79
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82         (0x8000 << 16) | (0x98f4 >> 2),
83         0x00000000,
84         (0x8040 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8000 << 16) | (0xe80 >> 2),
87         0x00000000,
88         (0x8040 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8000 << 16) | (0x89bc >> 2),
91         0x00000000,
92         (0x8040 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8000 << 16) | (0x8c1c >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x9c00 << 16) | (0x98f0 >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0xe7c >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x9148 >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x9c00 << 16) | (0x9150 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x897c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x8d8c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xac54 >> 2),
113         0X00000000,
114         0x3,
115         (0x9c00 << 16) | (0x98f8 >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x9910 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9914 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9918 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x991c >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x9920 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9924 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9928 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x992c >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9930 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9934 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9938 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x993c >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9940 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9944 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9948 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x994c >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9950 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9954 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9958 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x995c >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9960 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9964 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9968 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x996c >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9970 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9974 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9978 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x997c >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9980 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9984 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9988 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x998c >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x8c00 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c14 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c04 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c08 >> 2),
188         0x00000000,
189         (0x8000 << 16) | (0x9b7c >> 2),
190         0x00000000,
191         (0x8040 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8000 << 16) | (0xe84 >> 2),
194         0x00000000,
195         (0x8040 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8000 << 16) | (0x89c0 >> 2),
198         0x00000000,
199         (0x8040 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x914c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0x8c20 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x9354 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9060 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9364 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9100 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x913c >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x90e0 >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e4 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e8 >> 2),
226         0x00000000,
227         (0x8040 << 16) | (0x90e0 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e4 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x8bcc >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8b24 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x88c4 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8e50 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c0c >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8e58 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e5c >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x9508 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x950c >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x9494 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0xac0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac10 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac14 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xae00 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xac08 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x88d4 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88c8 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88cc >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x89b0 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x8b10 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8a14 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9830 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9834 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9838 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9a10 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x9870 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9874 >> 2),
286         0x00000000,
287         (0x8001 << 16) | (0x9870 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9874 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x9870 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9874 >> 2),
294         0x00000000,
295         (0x8041 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9874 >> 2),
298         0x00000000,
299         0x00000000
300 };
301
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304         0xc424, 0xffffffff, 0x00601005,
305         0xc47c, 0xffffffff, 0x10104040,
306         0xc488, 0xffffffff, 0x0100000a,
307         0xc314, 0xffffffff, 0x00000800,
308         0xc30c, 0xffffffff, 0x800000f4,
309         0xf4a8, 0xffffffff, 0x00000000
310 };
311
312 static const u32 tahiti_golden_registers[] =
313 {
314         0x9a10, 0x00010000, 0x00018208,
315         0x9830, 0xffffffff, 0x00000000,
316         0x9834, 0xf00fffff, 0x00000400,
317         0x9838, 0x0002021c, 0x00020200,
318         0xc78, 0x00000080, 0x00000000,
319         0xd030, 0x000300c0, 0x00800040,
320         0xd830, 0x000300c0, 0x00800040,
321         0x5bb0, 0x000000f0, 0x00000070,
322         0x5bc0, 0x00200000, 0x50100000,
323         0x7030, 0x31000311, 0x00000011,
324         0x277c, 0x00000003, 0x000007ff,
325         0x240c, 0x000007ff, 0x00000000,
326         0x8a14, 0xf000001f, 0x00000007,
327         0x8b24, 0xffffffff, 0x00ffffff,
328         0x8b10, 0x0000ff0f, 0x00000000,
329         0x28a4c, 0x07ffffff, 0x4e000000,
330         0x28350, 0x3f3f3fff, 0x2a00126a,
331         0x30, 0x000000ff, 0x0040,
332         0x34, 0x00000040, 0x00004040,
333         0x9100, 0x07ffffff, 0x03000000,
334         0x8e88, 0x01ff1f3f, 0x00000000,
335         0x8e84, 0x01ff1f3f, 0x00000000,
336         0x9060, 0x0000007f, 0x00000020,
337         0x9508, 0x00010000, 0x00010000,
338         0xac14, 0x00000200, 0x000002fb,
339         0xac10, 0xffffffff, 0x0000543b,
340         0xac0c, 0xffffffff, 0xa9210876,
341         0x88d0, 0xffffffff, 0x000fff40,
342         0x88d4, 0x0000001f, 0x00000010,
343         0x1410, 0x20000000, 0x20fffed8,
344         0x15c0, 0x000c0fc0, 0x000c0400
345 };
346
347 static const u32 tahiti_golden_registers2[] =
348 {
349         0xc64, 0x00000001, 0x00000001
350 };
351
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354         0xc424, 0xffffffff, 0x00601004,
355         0xc47c, 0xffffffff, 0x10102020,
356         0xc488, 0xffffffff, 0x01000020,
357         0xc314, 0xffffffff, 0x00000800,
358         0xc30c, 0xffffffff, 0x800000a4
359 };
360
361 static const u32 pitcairn_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x2ae4, 0x00073ffe, 0x000022a2,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x9060, 0x0000007f, 0x00000020,
384         0x9508, 0x00010000, 0x00010000,
385         0xac14, 0x000003ff, 0x000000f7,
386         0xac10, 0xffffffff, 0x00000000,
387         0xac0c, 0xffffffff, 0x32761054,
388         0x88d4, 0x0000001f, 0x00000010,
389         0x15c0, 0x000c0fc0, 0x000c0400
390 };
391
392 static const u32 verde_golden_rlc_registers[] =
393 {
394         0xc424, 0xffffffff, 0x033f1005,
395         0xc47c, 0xffffffff, 0x10808020,
396         0xc488, 0xffffffff, 0x00800008,
397         0xc314, 0xffffffff, 0x00001000,
398         0xc30c, 0xffffffff, 0x80010014
399 };
400
401 static const u32 verde_golden_registers[] =
402 {
403         0x9a10, 0x00010000, 0x00018208,
404         0x9830, 0xffffffff, 0x00000000,
405         0x9834, 0xf00fffff, 0x00000400,
406         0x9838, 0x0002021c, 0x00020200,
407         0xc78, 0x00000080, 0x00000000,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd030, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0xd830, 0x000300c0, 0x00800040,
412         0x5bb0, 0x000000f0, 0x00000070,
413         0x5bc0, 0x00200000, 0x50100000,
414         0x7030, 0x31000311, 0x00000011,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x240c, 0x000007ff, 0x00000000,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8b24, 0xffffffff, 0x00ffffff,
425         0x8b10, 0x0000ff0f, 0x00000000,
426         0x28a4c, 0x07ffffff, 0x4e000000,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x30, 0x000000ff, 0x0040,
431         0x34, 0x00000040, 0x00004040,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x9100, 0x07ffffff, 0x03000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x9060, 0x0000007f, 0x00000020,
441         0x9508, 0x00010000, 0x00010000,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0xac0c, 0xffffffff, 0x00001032,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x15c0, 0x000c0fc0, 0x000c0400
455 };
456
457 static const u32 oland_golden_rlc_registers[] =
458 {
459         0xc424, 0xffffffff, 0x00601005,
460         0xc47c, 0xffffffff, 0x10104040,
461         0xc488, 0xffffffff, 0x0100000a,
462         0xc314, 0xffffffff, 0x00000800,
463         0xc30c, 0xffffffff, 0x800000f4
464 };
465
466 static const u32 oland_golden_registers[] =
467 {
468         0x9a10, 0x00010000, 0x00018208,
469         0x9830, 0xffffffff, 0x00000000,
470         0x9834, 0xf00fffff, 0x00000400,
471         0x9838, 0x0002021c, 0x00020200,
472         0xc78, 0x00000080, 0x00000000,
473         0xd030, 0x000300c0, 0x00800040,
474         0xd830, 0x000300c0, 0x00800040,
475         0x5bb0, 0x000000f0, 0x00000070,
476         0x5bc0, 0x00200000, 0x50100000,
477         0x7030, 0x31000311, 0x00000011,
478         0x2ae4, 0x00073ffe, 0x000022a2,
479         0x240c, 0x000007ff, 0x00000000,
480         0x8a14, 0xf000001f, 0x00000007,
481         0x8b24, 0xffffffff, 0x00ffffff,
482         0x8b10, 0x0000ff0f, 0x00000000,
483         0x28a4c, 0x07ffffff, 0x4e000000,
484         0x28350, 0x3f3f3fff, 0x00000082,
485         0x30, 0x000000ff, 0x0040,
486         0x34, 0x00000040, 0x00004040,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x9060, 0x0000007f, 0x00000020,
489         0x9508, 0x00010000, 0x00010000,
490         0xac14, 0x000003ff, 0x000000f3,
491         0xac10, 0xffffffff, 0x00000000,
492         0xac0c, 0xffffffff, 0x00003210,
493         0x88d4, 0x0000001f, 0x00000010,
494         0x15c0, 0x000c0fc0, 0x000c0400
495 };
496
497 static const u32 hainan_golden_registers[] =
498 {
499         0x9a10, 0x00010000, 0x00018208,
500         0x9830, 0xffffffff, 0x00000000,
501         0x9834, 0xf00fffff, 0x00000400,
502         0x9838, 0x0002021c, 0x00020200,
503         0xd0c0, 0xff000fff, 0x00000100,
504         0xd030, 0x000300c0, 0x00800040,
505         0xd8c0, 0xff000fff, 0x00000100,
506         0xd830, 0x000300c0, 0x00800040,
507         0x2ae4, 0x00073ffe, 0x000022a2,
508         0x240c, 0x000007ff, 0x00000000,
509         0x8a14, 0xf000001f, 0x00000007,
510         0x8b24, 0xffffffff, 0x00ffffff,
511         0x8b10, 0x0000ff0f, 0x00000000,
512         0x28a4c, 0x07ffffff, 0x4e000000,
513         0x28350, 0x3f3f3fff, 0x00000000,
514         0x30, 0x000000ff, 0x0040,
515         0x34, 0x00000040, 0x00004040,
516         0x9100, 0x03e00000, 0x03600000,
517         0x9060, 0x0000007f, 0x00000020,
518         0x9508, 0x00010000, 0x00010000,
519         0xac14, 0x000003ff, 0x000000f1,
520         0xac10, 0xffffffff, 0x00000000,
521         0xac0c, 0xffffffff, 0x00003210,
522         0x88d4, 0x0000001f, 0x00000010,
523         0x15c0, 0x000c0fc0, 0x000c0400
524 };
525
526 static const u32 hainan_golden_registers2[] =
527 {
528         0x98f8, 0xffffffff, 0x02010001
529 };
530
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533         0xc400, 0xffffffff, 0xfffffffc,
534         0x802c, 0xffffffff, 0xe0000000,
535         0x9a60, 0xffffffff, 0x00000100,
536         0x92a4, 0xffffffff, 0x00000100,
537         0xc164, 0xffffffff, 0x00000100,
538         0x9774, 0xffffffff, 0x00000100,
539         0x8984, 0xffffffff, 0x06000100,
540         0x8a18, 0xffffffff, 0x00000100,
541         0x92a0, 0xffffffff, 0x00000100,
542         0xc380, 0xffffffff, 0x00000100,
543         0x8b28, 0xffffffff, 0x00000100,
544         0x9144, 0xffffffff, 0x00000100,
545         0x8d88, 0xffffffff, 0x00000100,
546         0x8d8c, 0xffffffff, 0x00000100,
547         0x9030, 0xffffffff, 0x00000100,
548         0x9034, 0xffffffff, 0x00000100,
549         0x9038, 0xffffffff, 0x00000100,
550         0x903c, 0xffffffff, 0x00000100,
551         0xad80, 0xffffffff, 0x00000100,
552         0xac54, 0xffffffff, 0x00000100,
553         0x897c, 0xffffffff, 0x06000100,
554         0x9868, 0xffffffff, 0x00000100,
555         0x9510, 0xffffffff, 0x00000100,
556         0xaf04, 0xffffffff, 0x00000100,
557         0xae04, 0xffffffff, 0x00000100,
558         0x949c, 0xffffffff, 0x00000100,
559         0x802c, 0xffffffff, 0xe0000000,
560         0x9160, 0xffffffff, 0x00010000,
561         0x9164, 0xffffffff, 0x00030002,
562         0x9168, 0xffffffff, 0x00040007,
563         0x916c, 0xffffffff, 0x00060005,
564         0x9170, 0xffffffff, 0x00090008,
565         0x9174, 0xffffffff, 0x00020001,
566         0x9178, 0xffffffff, 0x00040003,
567         0x917c, 0xffffffff, 0x00000007,
568         0x9180, 0xffffffff, 0x00060005,
569         0x9184, 0xffffffff, 0x00090008,
570         0x9188, 0xffffffff, 0x00030002,
571         0x918c, 0xffffffff, 0x00050004,
572         0x9190, 0xffffffff, 0x00000008,
573         0x9194, 0xffffffff, 0x00070006,
574         0x9198, 0xffffffff, 0x000a0009,
575         0x919c, 0xffffffff, 0x00040003,
576         0x91a0, 0xffffffff, 0x00060005,
577         0x91a4, 0xffffffff, 0x00000009,
578         0x91a8, 0xffffffff, 0x00080007,
579         0x91ac, 0xffffffff, 0x000b000a,
580         0x91b0, 0xffffffff, 0x00050004,
581         0x91b4, 0xffffffff, 0x00070006,
582         0x91b8, 0xffffffff, 0x0008000b,
583         0x91bc, 0xffffffff, 0x000a0009,
584         0x91c0, 0xffffffff, 0x000d000c,
585         0x91c4, 0xffffffff, 0x00060005,
586         0x91c8, 0xffffffff, 0x00080007,
587         0x91cc, 0xffffffff, 0x0000000b,
588         0x91d0, 0xffffffff, 0x000a0009,
589         0x91d4, 0xffffffff, 0x000d000c,
590         0x91d8, 0xffffffff, 0x00070006,
591         0x91dc, 0xffffffff, 0x00090008,
592         0x91e0, 0xffffffff, 0x0000000c,
593         0x91e4, 0xffffffff, 0x000b000a,
594         0x91e8, 0xffffffff, 0x000e000d,
595         0x91ec, 0xffffffff, 0x00080007,
596         0x91f0, 0xffffffff, 0x000a0009,
597         0x91f4, 0xffffffff, 0x0000000d,
598         0x91f8, 0xffffffff, 0x000c000b,
599         0x91fc, 0xffffffff, 0x000f000e,
600         0x9200, 0xffffffff, 0x00090008,
601         0x9204, 0xffffffff, 0x000b000a,
602         0x9208, 0xffffffff, 0x000c000f,
603         0x920c, 0xffffffff, 0x000e000d,
604         0x9210, 0xffffffff, 0x00110010,
605         0x9214, 0xffffffff, 0x000a0009,
606         0x9218, 0xffffffff, 0x000c000b,
607         0x921c, 0xffffffff, 0x0000000f,
608         0x9220, 0xffffffff, 0x000e000d,
609         0x9224, 0xffffffff, 0x00110010,
610         0x9228, 0xffffffff, 0x000b000a,
611         0x922c, 0xffffffff, 0x000d000c,
612         0x9230, 0xffffffff, 0x00000010,
613         0x9234, 0xffffffff, 0x000f000e,
614         0x9238, 0xffffffff, 0x00120011,
615         0x923c, 0xffffffff, 0x000c000b,
616         0x9240, 0xffffffff, 0x000e000d,
617         0x9244, 0xffffffff, 0x00000011,
618         0x9248, 0xffffffff, 0x0010000f,
619         0x924c, 0xffffffff, 0x00130012,
620         0x9250, 0xffffffff, 0x000d000c,
621         0x9254, 0xffffffff, 0x000f000e,
622         0x9258, 0xffffffff, 0x00100013,
623         0x925c, 0xffffffff, 0x00120011,
624         0x9260, 0xffffffff, 0x00150014,
625         0x9264, 0xffffffff, 0x000e000d,
626         0x9268, 0xffffffff, 0x0010000f,
627         0x926c, 0xffffffff, 0x00000013,
628         0x9270, 0xffffffff, 0x00120011,
629         0x9274, 0xffffffff, 0x00150014,
630         0x9278, 0xffffffff, 0x000f000e,
631         0x927c, 0xffffffff, 0x00110010,
632         0x9280, 0xffffffff, 0x00000014,
633         0x9284, 0xffffffff, 0x00130012,
634         0x9288, 0xffffffff, 0x00160015,
635         0x928c, 0xffffffff, 0x0010000f,
636         0x9290, 0xffffffff, 0x00120011,
637         0x9294, 0xffffffff, 0x00000015,
638         0x9298, 0xffffffff, 0x00140013,
639         0x929c, 0xffffffff, 0x00170016,
640         0x9150, 0xffffffff, 0x96940200,
641         0x8708, 0xffffffff, 0x00900100,
642         0xc478, 0xffffffff, 0x00000080,
643         0xc404, 0xffffffff, 0x0020003f,
644         0x30, 0xffffffff, 0x0000001c,
645         0x34, 0x000f0000, 0x000f0000,
646         0x160c, 0xffffffff, 0x00000100,
647         0x1024, 0xffffffff, 0x00000100,
648         0x102c, 0x00000101, 0x00000000,
649         0x20a8, 0xffffffff, 0x00000104,
650         0x264c, 0x000c0000, 0x000c0000,
651         0x2648, 0x000c0000, 0x000c0000,
652         0x55e4, 0xff000fff, 0x00000100,
653         0x55e8, 0x00000001, 0x00000001,
654         0x2f50, 0x00000001, 0x00000001,
655         0x30cc, 0xc0000fff, 0x00000104,
656         0xc1e4, 0x00000001, 0x00000001,
657         0xd0c0, 0xfffffff0, 0x00000100,
658         0xd8c0, 0xfffffff0, 0x00000100
659 };
660
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663         0xc400, 0xffffffff, 0xfffffffc,
664         0x802c, 0xffffffff, 0xe0000000,
665         0x9a60, 0xffffffff, 0x00000100,
666         0x92a4, 0xffffffff, 0x00000100,
667         0xc164, 0xffffffff, 0x00000100,
668         0x9774, 0xffffffff, 0x00000100,
669         0x8984, 0xffffffff, 0x06000100,
670         0x8a18, 0xffffffff, 0x00000100,
671         0x92a0, 0xffffffff, 0x00000100,
672         0xc380, 0xffffffff, 0x00000100,
673         0x8b28, 0xffffffff, 0x00000100,
674         0x9144, 0xffffffff, 0x00000100,
675         0x8d88, 0xffffffff, 0x00000100,
676         0x8d8c, 0xffffffff, 0x00000100,
677         0x9030, 0xffffffff, 0x00000100,
678         0x9034, 0xffffffff, 0x00000100,
679         0x9038, 0xffffffff, 0x00000100,
680         0x903c, 0xffffffff, 0x00000100,
681         0xad80, 0xffffffff, 0x00000100,
682         0xac54, 0xffffffff, 0x00000100,
683         0x897c, 0xffffffff, 0x06000100,
684         0x9868, 0xffffffff, 0x00000100,
685         0x9510, 0xffffffff, 0x00000100,
686         0xaf04, 0xffffffff, 0x00000100,
687         0xae04, 0xffffffff, 0x00000100,
688         0x949c, 0xffffffff, 0x00000100,
689         0x802c, 0xffffffff, 0xe0000000,
690         0x9160, 0xffffffff, 0x00010000,
691         0x9164, 0xffffffff, 0x00030002,
692         0x9168, 0xffffffff, 0x00040007,
693         0x916c, 0xffffffff, 0x00060005,
694         0x9170, 0xffffffff, 0x00090008,
695         0x9174, 0xffffffff, 0x00020001,
696         0x9178, 0xffffffff, 0x00040003,
697         0x917c, 0xffffffff, 0x00000007,
698         0x9180, 0xffffffff, 0x00060005,
699         0x9184, 0xffffffff, 0x00090008,
700         0x9188, 0xffffffff, 0x00030002,
701         0x918c, 0xffffffff, 0x00050004,
702         0x9190, 0xffffffff, 0x00000008,
703         0x9194, 0xffffffff, 0x00070006,
704         0x9198, 0xffffffff, 0x000a0009,
705         0x919c, 0xffffffff, 0x00040003,
706         0x91a0, 0xffffffff, 0x00060005,
707         0x91a4, 0xffffffff, 0x00000009,
708         0x91a8, 0xffffffff, 0x00080007,
709         0x91ac, 0xffffffff, 0x000b000a,
710         0x91b0, 0xffffffff, 0x00050004,
711         0x91b4, 0xffffffff, 0x00070006,
712         0x91b8, 0xffffffff, 0x0008000b,
713         0x91bc, 0xffffffff, 0x000a0009,
714         0x91c0, 0xffffffff, 0x000d000c,
715         0x9200, 0xffffffff, 0x00090008,
716         0x9204, 0xffffffff, 0x000b000a,
717         0x9208, 0xffffffff, 0x000c000f,
718         0x920c, 0xffffffff, 0x000e000d,
719         0x9210, 0xffffffff, 0x00110010,
720         0x9214, 0xffffffff, 0x000a0009,
721         0x9218, 0xffffffff, 0x000c000b,
722         0x921c, 0xffffffff, 0x0000000f,
723         0x9220, 0xffffffff, 0x000e000d,
724         0x9224, 0xffffffff, 0x00110010,
725         0x9228, 0xffffffff, 0x000b000a,
726         0x922c, 0xffffffff, 0x000d000c,
727         0x9230, 0xffffffff, 0x00000010,
728         0x9234, 0xffffffff, 0x000f000e,
729         0x9238, 0xffffffff, 0x00120011,
730         0x923c, 0xffffffff, 0x000c000b,
731         0x9240, 0xffffffff, 0x000e000d,
732         0x9244, 0xffffffff, 0x00000011,
733         0x9248, 0xffffffff, 0x0010000f,
734         0x924c, 0xffffffff, 0x00130012,
735         0x9250, 0xffffffff, 0x000d000c,
736         0x9254, 0xffffffff, 0x000f000e,
737         0x9258, 0xffffffff, 0x00100013,
738         0x925c, 0xffffffff, 0x00120011,
739         0x9260, 0xffffffff, 0x00150014,
740         0x9150, 0xffffffff, 0x96940200,
741         0x8708, 0xffffffff, 0x00900100,
742         0xc478, 0xffffffff, 0x00000080,
743         0xc404, 0xffffffff, 0x0020003f,
744         0x30, 0xffffffff, 0x0000001c,
745         0x34, 0x000f0000, 0x000f0000,
746         0x160c, 0xffffffff, 0x00000100,
747         0x1024, 0xffffffff, 0x00000100,
748         0x102c, 0x00000101, 0x00000000,
749         0x20a8, 0xffffffff, 0x00000104,
750         0x55e4, 0xff000fff, 0x00000100,
751         0x55e8, 0x00000001, 0x00000001,
752         0x2f50, 0x00000001, 0x00000001,
753         0x30cc, 0xc0000fff, 0x00000104,
754         0xc1e4, 0x00000001, 0x00000001,
755         0xd0c0, 0xfffffff0, 0x00000100,
756         0xd8c0, 0xfffffff0, 0x00000100
757 };
758
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761         0xc400, 0xffffffff, 0xfffffffc,
762         0x802c, 0xffffffff, 0xe0000000,
763         0x9a60, 0xffffffff, 0x00000100,
764         0x92a4, 0xffffffff, 0x00000100,
765         0xc164, 0xffffffff, 0x00000100,
766         0x9774, 0xffffffff, 0x00000100,
767         0x8984, 0xffffffff, 0x06000100,
768         0x8a18, 0xffffffff, 0x00000100,
769         0x92a0, 0xffffffff, 0x00000100,
770         0xc380, 0xffffffff, 0x00000100,
771         0x8b28, 0xffffffff, 0x00000100,
772         0x9144, 0xffffffff, 0x00000100,
773         0x8d88, 0xffffffff, 0x00000100,
774         0x8d8c, 0xffffffff, 0x00000100,
775         0x9030, 0xffffffff, 0x00000100,
776         0x9034, 0xffffffff, 0x00000100,
777         0x9038, 0xffffffff, 0x00000100,
778         0x903c, 0xffffffff, 0x00000100,
779         0xad80, 0xffffffff, 0x00000100,
780         0xac54, 0xffffffff, 0x00000100,
781         0x897c, 0xffffffff, 0x06000100,
782         0x9868, 0xffffffff, 0x00000100,
783         0x9510, 0xffffffff, 0x00000100,
784         0xaf04, 0xffffffff, 0x00000100,
785         0xae04, 0xffffffff, 0x00000100,
786         0x949c, 0xffffffff, 0x00000100,
787         0x802c, 0xffffffff, 0xe0000000,
788         0x9160, 0xffffffff, 0x00010000,
789         0x9164, 0xffffffff, 0x00030002,
790         0x9168, 0xffffffff, 0x00040007,
791         0x916c, 0xffffffff, 0x00060005,
792         0x9170, 0xffffffff, 0x00090008,
793         0x9174, 0xffffffff, 0x00020001,
794         0x9178, 0xffffffff, 0x00040003,
795         0x917c, 0xffffffff, 0x00000007,
796         0x9180, 0xffffffff, 0x00060005,
797         0x9184, 0xffffffff, 0x00090008,
798         0x9188, 0xffffffff, 0x00030002,
799         0x918c, 0xffffffff, 0x00050004,
800         0x9190, 0xffffffff, 0x00000008,
801         0x9194, 0xffffffff, 0x00070006,
802         0x9198, 0xffffffff, 0x000a0009,
803         0x919c, 0xffffffff, 0x00040003,
804         0x91a0, 0xffffffff, 0x00060005,
805         0x91a4, 0xffffffff, 0x00000009,
806         0x91a8, 0xffffffff, 0x00080007,
807         0x91ac, 0xffffffff, 0x000b000a,
808         0x91b0, 0xffffffff, 0x00050004,
809         0x91b4, 0xffffffff, 0x00070006,
810         0x91b8, 0xffffffff, 0x0008000b,
811         0x91bc, 0xffffffff, 0x000a0009,
812         0x91c0, 0xffffffff, 0x000d000c,
813         0x9200, 0xffffffff, 0x00090008,
814         0x9204, 0xffffffff, 0x000b000a,
815         0x9208, 0xffffffff, 0x000c000f,
816         0x920c, 0xffffffff, 0x000e000d,
817         0x9210, 0xffffffff, 0x00110010,
818         0x9214, 0xffffffff, 0x000a0009,
819         0x9218, 0xffffffff, 0x000c000b,
820         0x921c, 0xffffffff, 0x0000000f,
821         0x9220, 0xffffffff, 0x000e000d,
822         0x9224, 0xffffffff, 0x00110010,
823         0x9228, 0xffffffff, 0x000b000a,
824         0x922c, 0xffffffff, 0x000d000c,
825         0x9230, 0xffffffff, 0x00000010,
826         0x9234, 0xffffffff, 0x000f000e,
827         0x9238, 0xffffffff, 0x00120011,
828         0x923c, 0xffffffff, 0x000c000b,
829         0x9240, 0xffffffff, 0x000e000d,
830         0x9244, 0xffffffff, 0x00000011,
831         0x9248, 0xffffffff, 0x0010000f,
832         0x924c, 0xffffffff, 0x00130012,
833         0x9250, 0xffffffff, 0x000d000c,
834         0x9254, 0xffffffff, 0x000f000e,
835         0x9258, 0xffffffff, 0x00100013,
836         0x925c, 0xffffffff, 0x00120011,
837         0x9260, 0xffffffff, 0x00150014,
838         0x9150, 0xffffffff, 0x96940200,
839         0x8708, 0xffffffff, 0x00900100,
840         0xc478, 0xffffffff, 0x00000080,
841         0xc404, 0xffffffff, 0x0020003f,
842         0x30, 0xffffffff, 0x0000001c,
843         0x34, 0x000f0000, 0x000f0000,
844         0x160c, 0xffffffff, 0x00000100,
845         0x1024, 0xffffffff, 0x00000100,
846         0x102c, 0x00000101, 0x00000000,
847         0x20a8, 0xffffffff, 0x00000104,
848         0x264c, 0x000c0000, 0x000c0000,
849         0x2648, 0x000c0000, 0x000c0000,
850         0x55e4, 0xff000fff, 0x00000100,
851         0x55e8, 0x00000001, 0x00000001,
852         0x2f50, 0x00000001, 0x00000001,
853         0x30cc, 0xc0000fff, 0x00000104,
854         0xc1e4, 0x00000001, 0x00000001,
855         0xd0c0, 0xfffffff0, 0x00000100,
856         0xd8c0, 0xfffffff0, 0x00000100
857 };
858
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861         0xc400, 0xffffffff, 0xfffffffc,
862         0x802c, 0xffffffff, 0xe0000000,
863         0x9a60, 0xffffffff, 0x00000100,
864         0x92a4, 0xffffffff, 0x00000100,
865         0xc164, 0xffffffff, 0x00000100,
866         0x9774, 0xffffffff, 0x00000100,
867         0x8984, 0xffffffff, 0x06000100,
868         0x8a18, 0xffffffff, 0x00000100,
869         0x92a0, 0xffffffff, 0x00000100,
870         0xc380, 0xffffffff, 0x00000100,
871         0x8b28, 0xffffffff, 0x00000100,
872         0x9144, 0xffffffff, 0x00000100,
873         0x8d88, 0xffffffff, 0x00000100,
874         0x8d8c, 0xffffffff, 0x00000100,
875         0x9030, 0xffffffff, 0x00000100,
876         0x9034, 0xffffffff, 0x00000100,
877         0x9038, 0xffffffff, 0x00000100,
878         0x903c, 0xffffffff, 0x00000100,
879         0xad80, 0xffffffff, 0x00000100,
880         0xac54, 0xffffffff, 0x00000100,
881         0x897c, 0xffffffff, 0x06000100,
882         0x9868, 0xffffffff, 0x00000100,
883         0x9510, 0xffffffff, 0x00000100,
884         0xaf04, 0xffffffff, 0x00000100,
885         0xae04, 0xffffffff, 0x00000100,
886         0x949c, 0xffffffff, 0x00000100,
887         0x802c, 0xffffffff, 0xe0000000,
888         0x9160, 0xffffffff, 0x00010000,
889         0x9164, 0xffffffff, 0x00030002,
890         0x9168, 0xffffffff, 0x00040007,
891         0x916c, 0xffffffff, 0x00060005,
892         0x9170, 0xffffffff, 0x00090008,
893         0x9174, 0xffffffff, 0x00020001,
894         0x9178, 0xffffffff, 0x00040003,
895         0x917c, 0xffffffff, 0x00000007,
896         0x9180, 0xffffffff, 0x00060005,
897         0x9184, 0xffffffff, 0x00090008,
898         0x9188, 0xffffffff, 0x00030002,
899         0x918c, 0xffffffff, 0x00050004,
900         0x9190, 0xffffffff, 0x00000008,
901         0x9194, 0xffffffff, 0x00070006,
902         0x9198, 0xffffffff, 0x000a0009,
903         0x919c, 0xffffffff, 0x00040003,
904         0x91a0, 0xffffffff, 0x00060005,
905         0x91a4, 0xffffffff, 0x00000009,
906         0x91a8, 0xffffffff, 0x00080007,
907         0x91ac, 0xffffffff, 0x000b000a,
908         0x91b0, 0xffffffff, 0x00050004,
909         0x91b4, 0xffffffff, 0x00070006,
910         0x91b8, 0xffffffff, 0x0008000b,
911         0x91bc, 0xffffffff, 0x000a0009,
912         0x91c0, 0xffffffff, 0x000d000c,
913         0x91c4, 0xffffffff, 0x00060005,
914         0x91c8, 0xffffffff, 0x00080007,
915         0x91cc, 0xffffffff, 0x0000000b,
916         0x91d0, 0xffffffff, 0x000a0009,
917         0x91d4, 0xffffffff, 0x000d000c,
918         0x9150, 0xffffffff, 0x96940200,
919         0x8708, 0xffffffff, 0x00900100,
920         0xc478, 0xffffffff, 0x00000080,
921         0xc404, 0xffffffff, 0x0020003f,
922         0x30, 0xffffffff, 0x0000001c,
923         0x34, 0x000f0000, 0x000f0000,
924         0x160c, 0xffffffff, 0x00000100,
925         0x1024, 0xffffffff, 0x00000100,
926         0x102c, 0x00000101, 0x00000000,
927         0x20a8, 0xffffffff, 0x00000104,
928         0x264c, 0x000c0000, 0x000c0000,
929         0x2648, 0x000c0000, 0x000c0000,
930         0x55e4, 0xff000fff, 0x00000100,
931         0x55e8, 0x00000001, 0x00000001,
932         0x2f50, 0x00000001, 0x00000001,
933         0x30cc, 0xc0000fff, 0x00000104,
934         0xc1e4, 0x00000001, 0x00000001,
935         0xd0c0, 0xfffffff0, 0x00000100,
936         0xd8c0, 0xfffffff0, 0x00000100
937 };
938
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941         0xc400, 0xffffffff, 0xfffffffc,
942         0x802c, 0xffffffff, 0xe0000000,
943         0x9a60, 0xffffffff, 0x00000100,
944         0x92a4, 0xffffffff, 0x00000100,
945         0xc164, 0xffffffff, 0x00000100,
946         0x9774, 0xffffffff, 0x00000100,
947         0x8984, 0xffffffff, 0x06000100,
948         0x8a18, 0xffffffff, 0x00000100,
949         0x92a0, 0xffffffff, 0x00000100,
950         0xc380, 0xffffffff, 0x00000100,
951         0x8b28, 0xffffffff, 0x00000100,
952         0x9144, 0xffffffff, 0x00000100,
953         0x8d88, 0xffffffff, 0x00000100,
954         0x8d8c, 0xffffffff, 0x00000100,
955         0x9030, 0xffffffff, 0x00000100,
956         0x9034, 0xffffffff, 0x00000100,
957         0x9038, 0xffffffff, 0x00000100,
958         0x903c, 0xffffffff, 0x00000100,
959         0xad80, 0xffffffff, 0x00000100,
960         0xac54, 0xffffffff, 0x00000100,
961         0x897c, 0xffffffff, 0x06000100,
962         0x9868, 0xffffffff, 0x00000100,
963         0x9510, 0xffffffff, 0x00000100,
964         0xaf04, 0xffffffff, 0x00000100,
965         0xae04, 0xffffffff, 0x00000100,
966         0x949c, 0xffffffff, 0x00000100,
967         0x802c, 0xffffffff, 0xe0000000,
968         0x9160, 0xffffffff, 0x00010000,
969         0x9164, 0xffffffff, 0x00030002,
970         0x9168, 0xffffffff, 0x00040007,
971         0x916c, 0xffffffff, 0x00060005,
972         0x9170, 0xffffffff, 0x00090008,
973         0x9174, 0xffffffff, 0x00020001,
974         0x9178, 0xffffffff, 0x00040003,
975         0x917c, 0xffffffff, 0x00000007,
976         0x9180, 0xffffffff, 0x00060005,
977         0x9184, 0xffffffff, 0x00090008,
978         0x9188, 0xffffffff, 0x00030002,
979         0x918c, 0xffffffff, 0x00050004,
980         0x9190, 0xffffffff, 0x00000008,
981         0x9194, 0xffffffff, 0x00070006,
982         0x9198, 0xffffffff, 0x000a0009,
983         0x919c, 0xffffffff, 0x00040003,
984         0x91a0, 0xffffffff, 0x00060005,
985         0x91a4, 0xffffffff, 0x00000009,
986         0x91a8, 0xffffffff, 0x00080007,
987         0x91ac, 0xffffffff, 0x000b000a,
988         0x91b0, 0xffffffff, 0x00050004,
989         0x91b4, 0xffffffff, 0x00070006,
990         0x91b8, 0xffffffff, 0x0008000b,
991         0x91bc, 0xffffffff, 0x000a0009,
992         0x91c0, 0xffffffff, 0x000d000c,
993         0x91c4, 0xffffffff, 0x00060005,
994         0x91c8, 0xffffffff, 0x00080007,
995         0x91cc, 0xffffffff, 0x0000000b,
996         0x91d0, 0xffffffff, 0x000a0009,
997         0x91d4, 0xffffffff, 0x000d000c,
998         0x9150, 0xffffffff, 0x96940200,
999         0x8708, 0xffffffff, 0x00900100,
1000         0xc478, 0xffffffff, 0x00000080,
1001         0xc404, 0xffffffff, 0x0020003f,
1002         0x30, 0xffffffff, 0x0000001c,
1003         0x34, 0x000f0000, 0x000f0000,
1004         0x160c, 0xffffffff, 0x00000100,
1005         0x1024, 0xffffffff, 0x00000100,
1006         0x20a8, 0xffffffff, 0x00000104,
1007         0x264c, 0x000c0000, 0x000c0000,
1008         0x2648, 0x000c0000, 0x000c0000,
1009         0x2f50, 0x00000001, 0x00000001,
1010         0x30cc, 0xc0000fff, 0x00000104,
1011         0xc1e4, 0x00000001, 0x00000001,
1012         0xd0c0, 0xfffffff0, 0x00000100,
1013         0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015
1016 static u32 verde_pg_init[] =
1017 {
1018         0x353c, 0xffffffff, 0x40000,
1019         0x3538, 0xffffffff, 0x200010ff,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x7007,
1026         0x3538, 0xffffffff, 0x300010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x400000,
1033         0x3538, 0xffffffff, 0x100010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x120200,
1040         0x3538, 0xffffffff, 0x500010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x1e1e16,
1047         0x3538, 0xffffffff, 0x600010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x171f1e,
1054         0x3538, 0xffffffff, 0x700010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x3538, 0xffffffff, 0x9ff,
1062         0x3500, 0xffffffff, 0x0,
1063         0x3504, 0xffffffff, 0x10000800,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3504, 0xffffffff, 0xf,
1066         0x3500, 0xffffffff, 0x4,
1067         0x3504, 0xffffffff, 0x1000051e,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3504, 0xffffffff, 0xffff,
1070         0x3500, 0xffffffff, 0x8,
1071         0x3504, 0xffffffff, 0x80500,
1072         0x3500, 0xffffffff, 0x12,
1073         0x3504, 0xffffffff, 0x9050c,
1074         0x3500, 0xffffffff, 0x1d,
1075         0x3504, 0xffffffff, 0xb052c,
1076         0x3500, 0xffffffff, 0x2a,
1077         0x3504, 0xffffffff, 0x1053e,
1078         0x3500, 0xffffffff, 0x2d,
1079         0x3504, 0xffffffff, 0x10546,
1080         0x3500, 0xffffffff, 0x30,
1081         0x3504, 0xffffffff, 0xa054e,
1082         0x3500, 0xffffffff, 0x3c,
1083         0x3504, 0xffffffff, 0x1055f,
1084         0x3500, 0xffffffff, 0x3f,
1085         0x3504, 0xffffffff, 0x10567,
1086         0x3500, 0xffffffff, 0x42,
1087         0x3504, 0xffffffff, 0x1056f,
1088         0x3500, 0xffffffff, 0x45,
1089         0x3504, 0xffffffff, 0x10572,
1090         0x3500, 0xffffffff, 0x48,
1091         0x3504, 0xffffffff, 0x20575,
1092         0x3500, 0xffffffff, 0x4c,
1093         0x3504, 0xffffffff, 0x190801,
1094         0x3500, 0xffffffff, 0x67,
1095         0x3504, 0xffffffff, 0x1082a,
1096         0x3500, 0xffffffff, 0x6a,
1097         0x3504, 0xffffffff, 0x1b082d,
1098         0x3500, 0xffffffff, 0x87,
1099         0x3504, 0xffffffff, 0x310851,
1100         0x3500, 0xffffffff, 0xba,
1101         0x3504, 0xffffffff, 0x891,
1102         0x3500, 0xffffffff, 0xbc,
1103         0x3504, 0xffffffff, 0x893,
1104         0x3500, 0xffffffff, 0xbe,
1105         0x3504, 0xffffffff, 0x20895,
1106         0x3500, 0xffffffff, 0xc2,
1107         0x3504, 0xffffffff, 0x20899,
1108         0x3500, 0xffffffff, 0xc6,
1109         0x3504, 0xffffffff, 0x2089d,
1110         0x3500, 0xffffffff, 0xca,
1111         0x3504, 0xffffffff, 0x8a1,
1112         0x3500, 0xffffffff, 0xcc,
1113         0x3504, 0xffffffff, 0x8a3,
1114         0x3500, 0xffffffff, 0xce,
1115         0x3504, 0xffffffff, 0x308a5,
1116         0x3500, 0xffffffff, 0xd3,
1117         0x3504, 0xffffffff, 0x6d08cd,
1118         0x3500, 0xffffffff, 0x142,
1119         0x3504, 0xffffffff, 0x2000095a,
1120         0x3504, 0xffffffff, 0x1,
1121         0x3500, 0xffffffff, 0x144,
1122         0x3504, 0xffffffff, 0x301f095b,
1123         0x3500, 0xffffffff, 0x165,
1124         0x3504, 0xffffffff, 0xc094d,
1125         0x3500, 0xffffffff, 0x173,
1126         0x3504, 0xffffffff, 0xf096d,
1127         0x3500, 0xffffffff, 0x184,
1128         0x3504, 0xffffffff, 0x15097f,
1129         0x3500, 0xffffffff, 0x19b,
1130         0x3504, 0xffffffff, 0xc0998,
1131         0x3500, 0xffffffff, 0x1a9,
1132         0x3504, 0xffffffff, 0x409a7,
1133         0x3500, 0xffffffff, 0x1af,
1134         0x3504, 0xffffffff, 0xcdc,
1135         0x3500, 0xffffffff, 0x1b1,
1136         0x3504, 0xffffffff, 0x800,
1137         0x3508, 0xffffffff, 0x6c9b2000,
1138         0x3510, 0xfc00, 0x2000,
1139         0x3544, 0xffffffff, 0xfc0,
1140         0x28d4, 0x00000100, 0x100
1141 };
1142
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145         switch (rdev->family) {
1146         case CHIP_TAHITI:
1147                 radeon_program_register_sequence(rdev,
1148                                                  tahiti_golden_registers,
1149                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_rlc_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_mgcg_cgcg_init,
1155                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_golden_registers2,
1158                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159                 break;
1160         case CHIP_PITCAIRN:
1161                 radeon_program_register_sequence(rdev,
1162                                                  pitcairn_golden_registers,
1163                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_rlc_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_mgcg_cgcg_init,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170                 break;
1171         case CHIP_VERDE:
1172                 radeon_program_register_sequence(rdev,
1173                                                  verde_golden_registers,
1174                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_rlc_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_mgcg_cgcg_init,
1180                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_pg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1184                 break;
1185         case CHIP_OLAND:
1186                 radeon_program_register_sequence(rdev,
1187                                                  oland_golden_registers,
1188                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_rlc_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_mgcg_cgcg_init,
1194                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195                 break;
1196         case CHIP_HAINAN:
1197                 radeon_program_register_sequence(rdev,
1198                                                  hainan_golden_registers,
1199                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers2,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_mgcg_cgcg_init,
1205                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206                 break;
1207         default:
1208                 break;
1209         }
1210 }
1211
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226         u32 tmp;
1227
1228         tmp = RREG32(CG_CLKPIN_CNTL_2);
1229         if (tmp & MUX_TCLK_TO_XCLK)
1230                 return TCLK;
1231
1232         tmp = RREG32(CG_CLKPIN_CNTL);
1233         if (tmp & XTALIN_DIVIDE)
1234                 return reference_clock / 4;
1235
1236         return reference_clock;
1237 }
1238
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242         u32 temp;
1243         int actual_temp = 0;
1244
1245         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246                 CTF_TEMP_SHIFT;
1247
1248         if (temp & 0x200)
1249                 actual_temp = 255;
1250         else
1251                 actual_temp = temp & 0x1ff;
1252
1253         actual_temp = (actual_temp * 1000);
1254
1255         return actual_temp;
1256 }
1257
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261         {0x0000006f, 0x03044000},
1262         {0x00000070, 0x0480c018},
1263         {0x00000071, 0x00000040},
1264         {0x00000072, 0x01000000},
1265         {0x00000074, 0x000000ff},
1266         {0x00000075, 0x00143400},
1267         {0x00000076, 0x08ec0800},
1268         {0x00000077, 0x040000cc},
1269         {0x00000079, 0x00000000},
1270         {0x0000007a, 0x21000409},
1271         {0x0000007c, 0x00000000},
1272         {0x0000007d, 0xe8000000},
1273         {0x0000007e, 0x044408a8},
1274         {0x0000007f, 0x00000003},
1275         {0x00000080, 0x00000000},
1276         {0x00000081, 0x01000000},
1277         {0x00000082, 0x02000000},
1278         {0x00000083, 0x00000000},
1279         {0x00000084, 0xe3f3e4f4},
1280         {0x00000085, 0x00052024},
1281         {0x00000087, 0x00000000},
1282         {0x00000088, 0x66036603},
1283         {0x00000089, 0x01000000},
1284         {0x0000008b, 0x1c0a0000},
1285         {0x0000008c, 0xff010000},
1286         {0x0000008e, 0xffffefff},
1287         {0x0000008f, 0xfff3efff},
1288         {0x00000090, 0xfff3efbf},
1289         {0x00000094, 0x00101101},
1290         {0x00000095, 0x00000fff},
1291         {0x00000096, 0x00116fff},
1292         {0x00000097, 0x60010000},
1293         {0x00000098, 0x10010000},
1294         {0x00000099, 0x00006000},
1295         {0x0000009a, 0x00001000},
1296         {0x0000009f, 0x00a77400}
1297 };
1298
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300         {0x0000006f, 0x03044000},
1301         {0x00000070, 0x0480c018},
1302         {0x00000071, 0x00000040},
1303         {0x00000072, 0x01000000},
1304         {0x00000074, 0x000000ff},
1305         {0x00000075, 0x00143400},
1306         {0x00000076, 0x08ec0800},
1307         {0x00000077, 0x040000cc},
1308         {0x00000079, 0x00000000},
1309         {0x0000007a, 0x21000409},
1310         {0x0000007c, 0x00000000},
1311         {0x0000007d, 0xe8000000},
1312         {0x0000007e, 0x044408a8},
1313         {0x0000007f, 0x00000003},
1314         {0x00000080, 0x00000000},
1315         {0x00000081, 0x01000000},
1316         {0x00000082, 0x02000000},
1317         {0x00000083, 0x00000000},
1318         {0x00000084, 0xe3f3e4f4},
1319         {0x00000085, 0x00052024},
1320         {0x00000087, 0x00000000},
1321         {0x00000088, 0x66036603},
1322         {0x00000089, 0x01000000},
1323         {0x0000008b, 0x1c0a0000},
1324         {0x0000008c, 0xff010000},
1325         {0x0000008e, 0xffffefff},
1326         {0x0000008f, 0xfff3efff},
1327         {0x00000090, 0xfff3efbf},
1328         {0x00000094, 0x00101101},
1329         {0x00000095, 0x00000fff},
1330         {0x00000096, 0x00116fff},
1331         {0x00000097, 0x60010000},
1332         {0x00000098, 0x10010000},
1333         {0x00000099, 0x00006000},
1334         {0x0000009a, 0x00001000},
1335         {0x0000009f, 0x00a47400}
1336 };
1337
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339         {0x0000006f, 0x03044000},
1340         {0x00000070, 0x0480c018},
1341         {0x00000071, 0x00000040},
1342         {0x00000072, 0x01000000},
1343         {0x00000074, 0x000000ff},
1344         {0x00000075, 0x00143400},
1345         {0x00000076, 0x08ec0800},
1346         {0x00000077, 0x040000cc},
1347         {0x00000079, 0x00000000},
1348         {0x0000007a, 0x21000409},
1349         {0x0000007c, 0x00000000},
1350         {0x0000007d, 0xe8000000},
1351         {0x0000007e, 0x044408a8},
1352         {0x0000007f, 0x00000003},
1353         {0x00000080, 0x00000000},
1354         {0x00000081, 0x01000000},
1355         {0x00000082, 0x02000000},
1356         {0x00000083, 0x00000000},
1357         {0x00000084, 0xe3f3e4f4},
1358         {0x00000085, 0x00052024},
1359         {0x00000087, 0x00000000},
1360         {0x00000088, 0x66036603},
1361         {0x00000089, 0x01000000},
1362         {0x0000008b, 0x1c0a0000},
1363         {0x0000008c, 0xff010000},
1364         {0x0000008e, 0xffffefff},
1365         {0x0000008f, 0xfff3efff},
1366         {0x00000090, 0xfff3efbf},
1367         {0x00000094, 0x00101101},
1368         {0x00000095, 0x00000fff},
1369         {0x00000096, 0x00116fff},
1370         {0x00000097, 0x60010000},
1371         {0x00000098, 0x10010000},
1372         {0x00000099, 0x00006000},
1373         {0x0000009a, 0x00001000},
1374         {0x0000009f, 0x00a37400}
1375 };
1376
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378         {0x0000006f, 0x03044000},
1379         {0x00000070, 0x0480c018},
1380         {0x00000071, 0x00000040},
1381         {0x00000072, 0x01000000},
1382         {0x00000074, 0x000000ff},
1383         {0x00000075, 0x00143400},
1384         {0x00000076, 0x08ec0800},
1385         {0x00000077, 0x040000cc},
1386         {0x00000079, 0x00000000},
1387         {0x0000007a, 0x21000409},
1388         {0x0000007c, 0x00000000},
1389         {0x0000007d, 0xe8000000},
1390         {0x0000007e, 0x044408a8},
1391         {0x0000007f, 0x00000003},
1392         {0x00000080, 0x00000000},
1393         {0x00000081, 0x01000000},
1394         {0x00000082, 0x02000000},
1395         {0x00000083, 0x00000000},
1396         {0x00000084, 0xe3f3e4f4},
1397         {0x00000085, 0x00052024},
1398         {0x00000087, 0x00000000},
1399         {0x00000088, 0x66036603},
1400         {0x00000089, 0x01000000},
1401         {0x0000008b, 0x1c0a0000},
1402         {0x0000008c, 0xff010000},
1403         {0x0000008e, 0xffffefff},
1404         {0x0000008f, 0xfff3efff},
1405         {0x00000090, 0xfff3efbf},
1406         {0x00000094, 0x00101101},
1407         {0x00000095, 0x00000fff},
1408         {0x00000096, 0x00116fff},
1409         {0x00000097, 0x60010000},
1410         {0x00000098, 0x10010000},
1411         {0x00000099, 0x00006000},
1412         {0x0000009a, 0x00001000},
1413         {0x0000009f, 0x00a17730}
1414 };
1415
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417         {0x0000006f, 0x03044000},
1418         {0x00000070, 0x0480c018},
1419         {0x00000071, 0x00000040},
1420         {0x00000072, 0x01000000},
1421         {0x00000074, 0x000000ff},
1422         {0x00000075, 0x00143400},
1423         {0x00000076, 0x08ec0800},
1424         {0x00000077, 0x040000cc},
1425         {0x00000079, 0x00000000},
1426         {0x0000007a, 0x21000409},
1427         {0x0000007c, 0x00000000},
1428         {0x0000007d, 0xe8000000},
1429         {0x0000007e, 0x044408a8},
1430         {0x0000007f, 0x00000003},
1431         {0x00000080, 0x00000000},
1432         {0x00000081, 0x01000000},
1433         {0x00000082, 0x02000000},
1434         {0x00000083, 0x00000000},
1435         {0x00000084, 0xe3f3e4f4},
1436         {0x00000085, 0x00052024},
1437         {0x00000087, 0x00000000},
1438         {0x00000088, 0x66036603},
1439         {0x00000089, 0x01000000},
1440         {0x0000008b, 0x1c0a0000},
1441         {0x0000008c, 0xff010000},
1442         {0x0000008e, 0xffffefff},
1443         {0x0000008f, 0xfff3efff},
1444         {0x00000090, 0xfff3efbf},
1445         {0x00000094, 0x00101101},
1446         {0x00000095, 0x00000fff},
1447         {0x00000096, 0x00116fff},
1448         {0x00000097, 0x60010000},
1449         {0x00000098, 0x10010000},
1450         {0x00000099, 0x00006000},
1451         {0x0000009a, 0x00001000},
1452         {0x0000009f, 0x00a07730}
1453 };
1454
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458         const __be32 *fw_data;
1459         u32 running, blackout = 0;
1460         u32 *io_mc_regs;
1461         int i, ucode_size, regs_size;
1462
1463         if (!rdev->mc_fw)
1464                 return -EINVAL;
1465
1466         switch (rdev->family) {
1467         case CHIP_TAHITI:
1468                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469                 ucode_size = SI_MC_UCODE_SIZE;
1470                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1471                 break;
1472         case CHIP_PITCAIRN:
1473                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474                 ucode_size = SI_MC_UCODE_SIZE;
1475                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1476                 break;
1477         case CHIP_VERDE:
1478         default:
1479                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1480                 ucode_size = SI_MC_UCODE_SIZE;
1481                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1482                 break;
1483         case CHIP_OLAND:
1484                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1485                 ucode_size = OLAND_MC_UCODE_SIZE;
1486                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1487                 break;
1488         case CHIP_HAINAN:
1489                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490                 ucode_size = OLAND_MC_UCODE_SIZE;
1491                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1492                 break;
1493         }
1494
1495         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496
1497         if (running == 0) {
1498                 if (running) {
1499                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501                 }
1502
1503                 /* reset the engine and set to writable */
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506
1507                 /* load mc io regs */
1508                 for (i = 0; i < regs_size; i++) {
1509                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511                 }
1512                 /* load the MC ucode */
1513                 fw_data = (const __be32 *)rdev->mc_fw->data;
1514                 for (i = 0; i < ucode_size; i++)
1515                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516
1517                 /* put the engine back into the active state */
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521
1522                 /* wait for training to complete */
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525                                 break;
1526                         udelay(1);
1527                 }
1528                 for (i = 0; i < rdev->usec_timeout; i++) {
1529                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530                                 break;
1531                         udelay(1);
1532                 }
1533
1534                 if (running)
1535                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536         }
1537
1538         return 0;
1539 }
1540
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543         const char *chip_name;
1544         const char *rlc_chip_name;
1545         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546         size_t smc_req_size;
1547         char fw_name[30];
1548         int err;
1549
1550         DRM_DEBUG("\n");
1551
1552         switch (rdev->family) {
1553         case CHIP_TAHITI:
1554                 chip_name = "TAHITI";
1555                 rlc_chip_name = "TAHITI";
1556                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1558                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1559                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1561                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562                 break;
1563         case CHIP_PITCAIRN:
1564                 chip_name = "PITCAIRN";
1565                 rlc_chip_name = "PITCAIRN";
1566                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1568                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1569                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1571                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572                 break;
1573         case CHIP_VERDE:
1574                 chip_name = "VERDE";
1575                 rlc_chip_name = "VERDE";
1576                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1578                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1579                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1581                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582                 break;
1583         case CHIP_OLAND:
1584                 chip_name = "OLAND";
1585                 rlc_chip_name = "OLAND";
1586                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1588                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1589                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592                 break;
1593         case CHIP_HAINAN:
1594                 chip_name = "HAINAN";
1595                 rlc_chip_name = "HAINAN";
1596                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1598                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1599                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602                 break;
1603         default: BUG();
1604         }
1605
1606         DRM_INFO("Loading %s Microcode\n", chip_name);
1607
1608         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1609         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610         if (err)
1611                 goto out;
1612         if (rdev->pfp_fw->size != pfp_req_size) {
1613                 printk(KERN_ERR
1614                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615                        rdev->pfp_fw->size, fw_name);
1616                 err = -EINVAL;
1617                 goto out;
1618         }
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1621         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->me_fw->size != me_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->me_fw->size, fw_name);
1628                 err = -EINVAL;
1629         }
1630
1631         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1632         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633         if (err)
1634                 goto out;
1635         if (rdev->ce_fw->size != ce_req_size) {
1636                 printk(KERN_ERR
1637                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638                        rdev->ce_fw->size, fw_name);
1639                 err = -EINVAL;
1640         }
1641
1642         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1643         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644         if (err)
1645                 goto out;
1646         if (rdev->rlc_fw->size != rlc_req_size) {
1647                 printk(KERN_ERR
1648                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649                        rdev->rlc_fw->size, fw_name);
1650                 err = -EINVAL;
1651         }
1652
1653         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1654         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655         if (err)
1656                 goto out;
1657         if (rdev->mc_fw->size != mc_req_size) {
1658                 printk(KERN_ERR
1659                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660                        rdev->mc_fw->size, fw_name);
1661                 err = -EINVAL;
1662         }
1663
1664         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1665         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666         if (err) {
1667                 printk(KERN_ERR
1668                        "smc: error loading firmware \"%s\"\n",
1669                        fw_name);
1670                 release_firmware(rdev->smc_fw);
1671                 rdev->smc_fw = NULL;
1672         } else if (rdev->smc_fw->size != smc_req_size) {
1673                 printk(KERN_ERR
1674                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1675                        rdev->smc_fw->size, fw_name);
1676                 err = -EINVAL;
1677         }
1678
1679 out:
1680         if (err) {
1681                 if (err != -EINVAL)
1682                         printk(KERN_ERR
1683                                "si_cp: Failed to load firmware \"%s\"\n",
1684                                fw_name);
1685                 release_firmware(rdev->pfp_fw);
1686                 rdev->pfp_fw = NULL;
1687                 release_firmware(rdev->me_fw);
1688                 rdev->me_fw = NULL;
1689                 release_firmware(rdev->ce_fw);
1690                 rdev->ce_fw = NULL;
1691                 release_firmware(rdev->rlc_fw);
1692                 rdev->rlc_fw = NULL;
1693                 release_firmware(rdev->mc_fw);
1694                 rdev->mc_fw = NULL;
1695                 release_firmware(rdev->smc_fw);
1696                 rdev->smc_fw = NULL;
1697         }
1698         return err;
1699 }
1700
1701 /* watermark setup */
1702 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1703                                    struct radeon_crtc *radeon_crtc,
1704                                    struct drm_display_mode *mode,
1705                                    struct drm_display_mode *other_mode)
1706 {
1707         u32 tmp;
1708         /*
1709          * Line Buffer Setup
1710          * There are 3 line buffers, each one shared by 2 display controllers.
1711          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1712          * the display controllers.  The paritioning is done via one of four
1713          * preset allocations specified in bits 21:20:
1714          *  0 - half lb
1715          *  2 - whole lb, other crtc must be disabled
1716          */
1717         /* this can get tricky if we have two large displays on a paired group
1718          * of crtcs.  Ideally for multiple large displays we'd assign them to
1719          * non-linked crtcs for maximum line buffer allocation.
1720          */
1721         if (radeon_crtc->base.enabled && mode) {
1722                 if (other_mode)
1723                         tmp = 0; /* 1/2 */
1724                 else
1725                         tmp = 2; /* whole */
1726         } else
1727                 tmp = 0;
1728
1729         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1730                DC_LB_MEMORY_CONFIG(tmp));
1731
1732         if (radeon_crtc->base.enabled && mode) {
1733                 switch (tmp) {
1734                 case 0:
1735                 default:
1736                         return 4096 * 2;
1737                 case 2:
1738                         return 8192 * 2;
1739                 }
1740         }
1741
1742         /* controller not enabled, so no lb used */
1743         return 0;
1744 }
1745
1746 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1747 {
1748         u32 tmp = RREG32(MC_SHARED_CHMAP);
1749
1750         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1751         case 0:
1752         default:
1753                 return 1;
1754         case 1:
1755                 return 2;
1756         case 2:
1757                 return 4;
1758         case 3:
1759                 return 8;
1760         case 4:
1761                 return 3;
1762         case 5:
1763                 return 6;
1764         case 6:
1765                 return 10;
1766         case 7:
1767                 return 12;
1768         case 8:
1769                 return 16;
1770         }
1771 }
1772
1773 struct dce6_wm_params {
1774         u32 dram_channels; /* number of dram channels */
1775         u32 yclk;          /* bandwidth per dram data pin in kHz */
1776         u32 sclk;          /* engine clock in kHz */
1777         u32 disp_clk;      /* display clock in kHz */
1778         u32 src_width;     /* viewport width */
1779         u32 active_time;   /* active display time in ns */
1780         u32 blank_time;    /* blank time in ns */
1781         bool interlaced;    /* mode is interlaced */
1782         fixed20_12 vsc;    /* vertical scale ratio */
1783         u32 num_heads;     /* number of active crtcs */
1784         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1785         u32 lb_size;       /* line buffer allocated to pipe */
1786         u32 vtaps;         /* vertical scaler taps */
1787 };
1788
1789 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1790 {
1791         /* Calculate raw DRAM Bandwidth */
1792         fixed20_12 dram_efficiency; /* 0.7 */
1793         fixed20_12 yclk, dram_channels, bandwidth;
1794         fixed20_12 a;
1795
1796         a.full = dfixed_const(1000);
1797         yclk.full = dfixed_const(wm->yclk);
1798         yclk.full = dfixed_div(yclk, a);
1799         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1800         a.full = dfixed_const(10);
1801         dram_efficiency.full = dfixed_const(7);
1802         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1803         bandwidth.full = dfixed_mul(dram_channels, yclk);
1804         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1805
1806         return dfixed_trunc(bandwidth);
1807 }
1808
1809 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1810 {
1811         /* Calculate DRAM Bandwidth and the part allocated to display. */
1812         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1813         fixed20_12 yclk, dram_channels, bandwidth;
1814         fixed20_12 a;
1815
1816         a.full = dfixed_const(1000);
1817         yclk.full = dfixed_const(wm->yclk);
1818         yclk.full = dfixed_div(yclk, a);
1819         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1820         a.full = dfixed_const(10);
1821         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1822         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1823         bandwidth.full = dfixed_mul(dram_channels, yclk);
1824         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1825
1826         return dfixed_trunc(bandwidth);
1827 }
1828
1829 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1830 {
1831         /* Calculate the display Data return Bandwidth */
1832         fixed20_12 return_efficiency; /* 0.8 */
1833         fixed20_12 sclk, bandwidth;
1834         fixed20_12 a;
1835
1836         a.full = dfixed_const(1000);
1837         sclk.full = dfixed_const(wm->sclk);
1838         sclk.full = dfixed_div(sclk, a);
1839         a.full = dfixed_const(10);
1840         return_efficiency.full = dfixed_const(8);
1841         return_efficiency.full = dfixed_div(return_efficiency, a);
1842         a.full = dfixed_const(32);
1843         bandwidth.full = dfixed_mul(a, sclk);
1844         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1845
1846         return dfixed_trunc(bandwidth);
1847 }
1848
1849 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1850 {
1851         return 32;
1852 }
1853
1854 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1855 {
1856         /* Calculate the DMIF Request Bandwidth */
1857         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1858         fixed20_12 disp_clk, sclk, bandwidth;
1859         fixed20_12 a, b1, b2;
1860         u32 min_bandwidth;
1861
1862         a.full = dfixed_const(1000);
1863         disp_clk.full = dfixed_const(wm->disp_clk);
1864         disp_clk.full = dfixed_div(disp_clk, a);
1865         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1866         b1.full = dfixed_mul(a, disp_clk);
1867
1868         a.full = dfixed_const(1000);
1869         sclk.full = dfixed_const(wm->sclk);
1870         sclk.full = dfixed_div(sclk, a);
1871         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1872         b2.full = dfixed_mul(a, sclk);
1873
1874         a.full = dfixed_const(10);
1875         disp_clk_request_efficiency.full = dfixed_const(8);
1876         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1877
1878         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1879
1880         a.full = dfixed_const(min_bandwidth);
1881         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1882
1883         return dfixed_trunc(bandwidth);
1884 }
1885
1886 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1887 {
1888         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1889         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1890         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1891         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1892
1893         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1894 }
1895
1896 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1897 {
1898         /* Calculate the display mode Average Bandwidth
1899          * DisplayMode should contain the source and destination dimensions,
1900          * timing, etc.
1901          */
1902         fixed20_12 bpp;
1903         fixed20_12 line_time;
1904         fixed20_12 src_width;
1905         fixed20_12 bandwidth;
1906         fixed20_12 a;
1907
1908         a.full = dfixed_const(1000);
1909         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1910         line_time.full = dfixed_div(line_time, a);
1911         bpp.full = dfixed_const(wm->bytes_per_pixel);
1912         src_width.full = dfixed_const(wm->src_width);
1913         bandwidth.full = dfixed_mul(src_width, bpp);
1914         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1915         bandwidth.full = dfixed_div(bandwidth, line_time);
1916
1917         return dfixed_trunc(bandwidth);
1918 }
1919
1920 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1921 {
1922         /* First calcualte the latency in ns */
1923         u32 mc_latency = 2000; /* 2000 ns. */
1924         u32 available_bandwidth = dce6_available_bandwidth(wm);
1925         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1926         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1927         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1928         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1929                 (wm->num_heads * cursor_line_pair_return_time);
1930         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1931         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1932         u32 tmp, dmif_size = 12288;
1933         fixed20_12 a, b, c;
1934
1935         if (wm->num_heads == 0)
1936                 return 0;
1937
1938         a.full = dfixed_const(2);
1939         b.full = dfixed_const(1);
1940         if ((wm->vsc.full > a.full) ||
1941             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1942             (wm->vtaps >= 5) ||
1943             ((wm->vsc.full >= a.full) && wm->interlaced))
1944                 max_src_lines_per_dst_line = 4;
1945         else
1946                 max_src_lines_per_dst_line = 2;
1947
1948         a.full = dfixed_const(available_bandwidth);
1949         b.full = dfixed_const(wm->num_heads);
1950         a.full = dfixed_div(a, b);
1951
1952         b.full = dfixed_const(mc_latency + 512);
1953         c.full = dfixed_const(wm->disp_clk);
1954         b.full = dfixed_div(b, c);
1955
1956         c.full = dfixed_const(dmif_size);
1957         b.full = dfixed_div(c, b);
1958
1959         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1960
1961         b.full = dfixed_const(1000);
1962         c.full = dfixed_const(wm->disp_clk);
1963         b.full = dfixed_div(c, b);
1964         c.full = dfixed_const(wm->bytes_per_pixel);
1965         b.full = dfixed_mul(b, c);
1966
1967         lb_fill_bw = min(tmp, dfixed_trunc(b));
1968
1969         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1970         b.full = dfixed_const(1000);
1971         c.full = dfixed_const(lb_fill_bw);
1972         b.full = dfixed_div(c, b);
1973         a.full = dfixed_div(a, b);
1974         line_fill_time = dfixed_trunc(a);
1975
1976         if (line_fill_time < wm->active_time)
1977                 return latency;
1978         else
1979                 return latency + (line_fill_time - wm->active_time);
1980
1981 }
1982
1983 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1984 {
1985         if (dce6_average_bandwidth(wm) <=
1986             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1987                 return true;
1988         else
1989                 return false;
1990 };
1991
1992 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1993 {
1994         if (dce6_average_bandwidth(wm) <=
1995             (dce6_available_bandwidth(wm) / wm->num_heads))
1996                 return true;
1997         else
1998                 return false;
1999 };
2000
2001 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2002 {
2003         u32 lb_partitions = wm->lb_size / wm->src_width;
2004         u32 line_time = wm->active_time + wm->blank_time;
2005         u32 latency_tolerant_lines;
2006         u32 latency_hiding;
2007         fixed20_12 a;
2008
2009         a.full = dfixed_const(1);
2010         if (wm->vsc.full > a.full)
2011                 latency_tolerant_lines = 1;
2012         else {
2013                 if (lb_partitions <= (wm->vtaps + 1))
2014                         latency_tolerant_lines = 1;
2015                 else
2016                         latency_tolerant_lines = 2;
2017         }
2018
2019         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2020
2021         if (dce6_latency_watermark(wm) <= latency_hiding)
2022                 return true;
2023         else
2024                 return false;
2025 }
2026
2027 static void dce6_program_watermarks(struct radeon_device *rdev,
2028                                          struct radeon_crtc *radeon_crtc,
2029                                          u32 lb_size, u32 num_heads)
2030 {
2031         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2032         struct dce6_wm_params wm_low, wm_high;
2033         u32 dram_channels;
2034         u32 pixel_period;
2035         u32 line_time = 0;
2036         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2037         u32 priority_a_mark = 0, priority_b_mark = 0;
2038         u32 priority_a_cnt = PRIORITY_OFF;
2039         u32 priority_b_cnt = PRIORITY_OFF;
2040         u32 tmp, arb_control3;
2041         fixed20_12 a, b, c;
2042
2043         if (radeon_crtc->base.enabled && num_heads && mode) {
2044                 pixel_period = 1000000 / (u32)mode->clock;
2045                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2046                 priority_a_cnt = 0;
2047                 priority_b_cnt = 0;
2048
2049                 if (rdev->family == CHIP_ARUBA)
2050                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2051                 else
2052                         dram_channels = si_get_number_of_dram_channels(rdev);
2053
2054                 /* watermark for high clocks */
2055                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2056                         wm_high.yclk =
2057                                 radeon_dpm_get_mclk(rdev, false) * 10;
2058                         wm_high.sclk =
2059                                 radeon_dpm_get_sclk(rdev, false) * 10;
2060                 } else {
2061                         wm_high.yclk = rdev->pm.current_mclk * 10;
2062                         wm_high.sclk = rdev->pm.current_sclk * 10;
2063                 }
2064
2065                 wm_high.disp_clk = mode->clock;
2066                 wm_high.src_width = mode->crtc_hdisplay;
2067                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2068                 wm_high.blank_time = line_time - wm_high.active_time;
2069                 wm_high.interlaced = false;
2070                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2071                         wm_high.interlaced = true;
2072                 wm_high.vsc = radeon_crtc->vsc;
2073                 wm_high.vtaps = 1;
2074                 if (radeon_crtc->rmx_type != RMX_OFF)
2075                         wm_high.vtaps = 2;
2076                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2077                 wm_high.lb_size = lb_size;
2078                 wm_high.dram_channels = dram_channels;
2079                 wm_high.num_heads = num_heads;
2080
2081                 /* watermark for low clocks */
2082                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2083                         wm_low.yclk =
2084                                 radeon_dpm_get_mclk(rdev, true) * 10;
2085                         wm_low.sclk =
2086                                 radeon_dpm_get_sclk(rdev, true) * 10;
2087                 } else {
2088                         wm_low.yclk = rdev->pm.current_mclk * 10;
2089                         wm_low.sclk = rdev->pm.current_sclk * 10;
2090                 }
2091
2092                 wm_low.disp_clk = mode->clock;
2093                 wm_low.src_width = mode->crtc_hdisplay;
2094                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2095                 wm_low.blank_time = line_time - wm_low.active_time;
2096                 wm_low.interlaced = false;
2097                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2098                         wm_low.interlaced = true;
2099                 wm_low.vsc = radeon_crtc->vsc;
2100                 wm_low.vtaps = 1;
2101                 if (radeon_crtc->rmx_type != RMX_OFF)
2102                         wm_low.vtaps = 2;
2103                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2104                 wm_low.lb_size = lb_size;
2105                 wm_low.dram_channels = dram_channels;
2106                 wm_low.num_heads = num_heads;
2107
2108                 /* set for high clocks */
2109                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2110                 /* set for low clocks */
2111                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2112
2113                 /* possibly force display priority to high */
2114                 /* should really do this at mode validation time... */
2115                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2116                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2117                     !dce6_check_latency_hiding(&wm_high) ||
2118                     (rdev->disp_priority == 2)) {
2119                         DRM_DEBUG_KMS("force priority to high\n");
2120                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2121                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2122                 }
2123                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2124                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2125                     !dce6_check_latency_hiding(&wm_low) ||
2126                     (rdev->disp_priority == 2)) {
2127                         DRM_DEBUG_KMS("force priority to high\n");
2128                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2129                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2130                 }
2131
2132                 a.full = dfixed_const(1000);
2133                 b.full = dfixed_const(mode->clock);
2134                 b.full = dfixed_div(b, a);
2135                 c.full = dfixed_const(latency_watermark_a);
2136                 c.full = dfixed_mul(c, b);
2137                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2138                 c.full = dfixed_div(c, a);
2139                 a.full = dfixed_const(16);
2140                 c.full = dfixed_div(c, a);
2141                 priority_a_mark = dfixed_trunc(c);
2142                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2143
2144                 a.full = dfixed_const(1000);
2145                 b.full = dfixed_const(mode->clock);
2146                 b.full = dfixed_div(b, a);
2147                 c.full = dfixed_const(latency_watermark_b);
2148                 c.full = dfixed_mul(c, b);
2149                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2150                 c.full = dfixed_div(c, a);
2151                 a.full = dfixed_const(16);
2152                 c.full = dfixed_div(c, a);
2153                 priority_b_mark = dfixed_trunc(c);
2154                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2155         }
2156
2157         /* select wm A */
2158         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2159         tmp = arb_control3;
2160         tmp &= ~LATENCY_WATERMARK_MASK(3);
2161         tmp |= LATENCY_WATERMARK_MASK(1);
2162         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2163         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2164                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2165                 LATENCY_HIGH_WATERMARK(line_time)));
2166         /* select wm B */
2167         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2168         tmp &= ~LATENCY_WATERMARK_MASK(3);
2169         tmp |= LATENCY_WATERMARK_MASK(2);
2170         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2171         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2172                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2173                 LATENCY_HIGH_WATERMARK(line_time)));
2174         /* restore original selection */
2175         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2176
2177         /* write the priority marks */
2178         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2179         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2180
2181         /* save values for DPM */
2182         radeon_crtc->line_time = line_time;
2183         radeon_crtc->wm_high = latency_watermark_a;
2184         radeon_crtc->wm_low = latency_watermark_b;
2185 }
2186
2187 void dce6_bandwidth_update(struct radeon_device *rdev)
2188 {
2189         struct drm_display_mode *mode0 = NULL;
2190         struct drm_display_mode *mode1 = NULL;
2191         u32 num_heads = 0, lb_size;
2192         int i;
2193
2194         radeon_update_display_priority(rdev);
2195
2196         for (i = 0; i < rdev->num_crtc; i++) {
2197                 if (rdev->mode_info.crtcs[i]->base.enabled)
2198                         num_heads++;
2199         }
2200         for (i = 0; i < rdev->num_crtc; i += 2) {
2201                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2202                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2203                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2204                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2205                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2206                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2207         }
2208 }
2209
2210 /*
2211  * Core functions
2212  */
2213 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2214 {
2215         const u32 num_tile_mode_states = 32;
2216         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2217
2218         switch (rdev->config.si.mem_row_size_in_kb) {
2219         case 1:
2220                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2221                 break;
2222         case 2:
2223         default:
2224                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2225                 break;
2226         case 4:
2227                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2228                 break;
2229         }
2230
2231         if ((rdev->family == CHIP_TAHITI) ||
2232             (rdev->family == CHIP_PITCAIRN)) {
2233                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2234                         switch (reg_offset) {
2235                         case 0:  /* non-AA compressed depth or any compressed stencil */
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2240                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2241                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2244                                 break;
2245                         case 1:  /* 2xAA/4xAA compressed depth only */
2246                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2248                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2249                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2250                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2251                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2254                                 break;
2255                         case 2:  /* 8xAA compressed depth only */
2256                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2258                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2259                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2260                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2261                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2264                                 break;
2265                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2268                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2269                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2270                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2271                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2274                                 break;
2275                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2276                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2278                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2279                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2280                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2281                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2284                                 break;
2285                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2286                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2288                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2289                                                  TILE_SPLIT(split_equal_to_row_size) |
2290                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2291                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2294                                 break;
2295                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2296                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2298                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2299                                                  TILE_SPLIT(split_equal_to_row_size) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2301                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2304                                 break;
2305                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2309                                                  TILE_SPLIT(split_equal_to_row_size) |
2310                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2311                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2314                                 break;
2315                         case 8:  /* 1D and 1D Array Surfaces */
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2317                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2319                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2320                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2321                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2324                                 break;
2325                         case 9:  /* Displayable maps. */
2326                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2329                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2330                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2331                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2334                                 break;
2335                         case 10:  /* Display 8bpp. */
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2340                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2341                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2344                                 break;
2345                         case 11:  /* Display 16bpp. */
2346                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2349                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2350                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2351                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2353                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2354                                 break;
2355                         case 12:  /* Display 32bpp. */
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2359                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2360                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2361                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2364                                 break;
2365                         case 13:  /* Thin. */
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2368                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2369                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2370                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2371                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2374                                 break;
2375                         case 14:  /* Thin 8 bpp. */
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2377                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2378                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2379                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2380                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2381                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2384                                 break;
2385                         case 15:  /* Thin 16 bpp. */
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2390                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2391                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2394                                 break;
2395                         case 16:  /* Thin 32 bpp. */
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2398                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2399                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2401                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2403                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2404                                 break;
2405                         case 17:  /* Thin 64 bpp. */
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2409                                                  TILE_SPLIT(split_equal_to_row_size) |
2410                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2411                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2414                                 break;
2415                         case 21:  /* 8 bpp PRT. */
2416                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2418                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2419                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2420                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2421                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2424                                 break;
2425                         case 22:  /* 16 bpp PRT */
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2428                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2429                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2430                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2431                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2434                                 break;
2435                         case 23:  /* 32 bpp PRT */
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2439                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2440                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2441                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2444                                 break;
2445                         case 24:  /* 64 bpp PRT */
2446                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2448                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2449                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2450                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2451                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2453                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2454                                 break;
2455                         case 25:  /* 128 bpp PRT */
2456                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2458                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2459                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2460                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2461                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2464                                 break;
2465                         default:
2466                                 gb_tile_moden = 0;
2467                                 break;
2468                         }
2469                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2470                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2471                 }
2472         } else if ((rdev->family == CHIP_VERDE) ||
2473                    (rdev->family == CHIP_OLAND) ||
2474                    (rdev->family == CHIP_HAINAN)) {
2475                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2476                         switch (reg_offset) {
2477                         case 0:  /* non-AA compressed depth or any compressed stencil */
2478                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2481                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2483                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2485                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2486                                 break;
2487                         case 1:  /* 2xAA/4xAA compressed depth only */
2488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2490                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2491                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2492                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2493                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2495                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2496                                 break;
2497                         case 2:  /* 8xAA compressed depth only */
2498                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2500                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2502                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2503                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2506                                 break;
2507                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2508                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2510                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2511                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2513                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2516                                 break;
2517                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2518                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2519                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2520                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2521                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2522                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2523                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2526                                 break;
2527                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2528                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2531                                                  TILE_SPLIT(split_equal_to_row_size) |
2532                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2533                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2536                                 break;
2537                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2541                                                  TILE_SPLIT(split_equal_to_row_size) |
2542                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2543                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2546                                 break;
2547                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2548                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2550                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2551                                                  TILE_SPLIT(split_equal_to_row_size) |
2552                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2553                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2556                                 break;
2557                         case 8:  /* 1D and 1D Array Surfaces */
2558                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2559                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2561                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2562                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2563                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2566                                 break;
2567                         case 9:  /* Displayable maps. */
2568                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2571                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2572                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2573                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576                                 break;
2577                         case 10:  /* Display 8bpp. */
2578                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2581                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2582                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2583                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2586                                 break;
2587                         case 11:  /* Display 16bpp. */
2588                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2590                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2592                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2593                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2595                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596                                 break;
2597                         case 12:  /* Display 32bpp. */
2598                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2600                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2602                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2603                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2606                                 break;
2607                         case 13:  /* Thin. */
2608                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2610                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2612                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2613                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616                                 break;
2617                         case 14:  /* Thin 8 bpp. */
2618                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2620                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2622                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2623                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2626                                 break;
2627                         case 15:  /* Thin 16 bpp. */
2628                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2630                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2631                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2632                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2633                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2635                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2636                                 break;
2637                         case 16:  /* Thin 32 bpp. */
2638                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2640                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2641                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2642                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2643                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2646                                 break;
2647                         case 17:  /* Thin 64 bpp. */
2648                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2650                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2651                                                  TILE_SPLIT(split_equal_to_row_size) |
2652                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2653                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2656                                 break;
2657                         case 21:  /* 8 bpp PRT. */
2658                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2662                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2663                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2664                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2666                                 break;
2667                         case 22:  /* 16 bpp PRT */
2668                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2673                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2676                                 break;
2677                         case 23:  /* 32 bpp PRT */
2678                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2681                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2683                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2685                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686                                 break;
2687                         case 24:  /* 64 bpp PRT */
2688                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2690                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2691                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2692                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2693                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2695                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2696                                 break;
2697                         case 25:  /* 128 bpp PRT */
2698                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2701                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2702                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2703                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2706                                 break;
2707                         default:
2708                                 gb_tile_moden = 0;
2709                                 break;
2710                         }
2711                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2712                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2713                 }
2714         } else
2715                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2716 }
2717
2718 static void si_select_se_sh(struct radeon_device *rdev,
2719                             u32 se_num, u32 sh_num)
2720 {
2721         u32 data = INSTANCE_BROADCAST_WRITES;
2722
2723         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2724                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2725         else if (se_num == 0xffffffff)
2726                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2727         else if (sh_num == 0xffffffff)
2728                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2729         else
2730                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2731         WREG32(GRBM_GFX_INDEX, data);
2732 }
2733
2734 static u32 si_create_bitmask(u32 bit_width)
2735 {
2736         u32 i, mask = 0;
2737
2738         for (i = 0; i < bit_width; i++) {
2739                 mask <<= 1;
2740                 mask |= 1;
2741         }
2742         return mask;
2743 }
2744
2745 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2746 {
2747         u32 data, mask;
2748
2749         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2750         if (data & 1)
2751                 data &= INACTIVE_CUS_MASK;
2752         else
2753                 data = 0;
2754         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2755
2756         data >>= INACTIVE_CUS_SHIFT;
2757
2758         mask = si_create_bitmask(cu_per_sh);
2759
2760         return ~data & mask;
2761 }
2762
2763 static void si_setup_spi(struct radeon_device *rdev,
2764                          u32 se_num, u32 sh_per_se,
2765                          u32 cu_per_sh)
2766 {
2767         int i, j, k;
2768         u32 data, mask, active_cu;
2769
2770         for (i = 0; i < se_num; i++) {
2771                 for (j = 0; j < sh_per_se; j++) {
2772                         si_select_se_sh(rdev, i, j);
2773                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2774                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2775
2776                         mask = 1;
2777                         for (k = 0; k < 16; k++) {
2778                                 mask <<= k;
2779                                 if (active_cu & mask) {
2780                                         data &= ~mask;
2781                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2782                                         break;
2783                                 }
2784                         }
2785                 }
2786         }
2787         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2788 }
2789
2790 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2791                               u32 max_rb_num, u32 se_num,
2792                               u32 sh_per_se)
2793 {
2794         u32 data, mask;
2795
2796         data = RREG32(CC_RB_BACKEND_DISABLE);
2797         if (data & 1)
2798                 data &= BACKEND_DISABLE_MASK;
2799         else
2800                 data = 0;
2801         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2802
2803         data >>= BACKEND_DISABLE_SHIFT;
2804
2805         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2806
2807         return data & mask;
2808 }
2809
2810 static void si_setup_rb(struct radeon_device *rdev,
2811                         u32 se_num, u32 sh_per_se,
2812                         u32 max_rb_num)
2813 {
2814         int i, j;
2815         u32 data, mask;
2816         u32 disabled_rbs = 0;
2817         u32 enabled_rbs = 0;
2818
2819         for (i = 0; i < se_num; i++) {
2820                 for (j = 0; j < sh_per_se; j++) {
2821                         si_select_se_sh(rdev, i, j);
2822                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2823                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2824                 }
2825         }
2826         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2827
2828         mask = 1;
2829         for (i = 0; i < max_rb_num; i++) {
2830                 if (!(disabled_rbs & mask))
2831                         enabled_rbs |= mask;
2832                 mask <<= 1;
2833         }
2834
2835         for (i = 0; i < se_num; i++) {
2836                 si_select_se_sh(rdev, i, 0xffffffff);
2837                 data = 0;
2838                 for (j = 0; j < sh_per_se; j++) {
2839                         switch (enabled_rbs & 3) {
2840                         case 1:
2841                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2842                                 break;
2843                         case 2:
2844                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2845                                 break;
2846                         case 3:
2847                         default:
2848                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2849                                 break;
2850                         }
2851                         enabled_rbs >>= 2;
2852                 }
2853                 WREG32(PA_SC_RASTER_CONFIG, data);
2854         }
2855         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2856 }
2857
2858 static void si_gpu_init(struct radeon_device *rdev)
2859 {
2860         u32 gb_addr_config = 0;
2861         u32 mc_shared_chmap, mc_arb_ramcfg;
2862         u32 sx_debug_1;
2863         u32 hdp_host_path_cntl;
2864         u32 tmp;
2865         int i, j;
2866
2867         switch (rdev->family) {
2868         case CHIP_TAHITI:
2869                 rdev->config.si.max_shader_engines = 2;
2870                 rdev->config.si.max_tile_pipes = 12;
2871                 rdev->config.si.max_cu_per_sh = 8;
2872                 rdev->config.si.max_sh_per_se = 2;
2873                 rdev->config.si.max_backends_per_se = 4;
2874                 rdev->config.si.max_texture_channel_caches = 12;
2875                 rdev->config.si.max_gprs = 256;
2876                 rdev->config.si.max_gs_threads = 32;
2877                 rdev->config.si.max_hw_contexts = 8;
2878
2879                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2880                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2881                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2882                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2883                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2884                 break;
2885         case CHIP_PITCAIRN:
2886                 rdev->config.si.max_shader_engines = 2;
2887                 rdev->config.si.max_tile_pipes = 8;
2888                 rdev->config.si.max_cu_per_sh = 5;
2889                 rdev->config.si.max_sh_per_se = 2;
2890                 rdev->config.si.max_backends_per_se = 4;
2891                 rdev->config.si.max_texture_channel_caches = 8;
2892                 rdev->config.si.max_gprs = 256;
2893                 rdev->config.si.max_gs_threads = 32;
2894                 rdev->config.si.max_hw_contexts = 8;
2895
2896                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2897                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2898                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2899                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2900                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2901                 break;
2902         case CHIP_VERDE:
2903         default:
2904                 rdev->config.si.max_shader_engines = 1;
2905                 rdev->config.si.max_tile_pipes = 4;
2906                 rdev->config.si.max_cu_per_sh = 5;
2907                 rdev->config.si.max_sh_per_se = 2;
2908                 rdev->config.si.max_backends_per_se = 4;
2909                 rdev->config.si.max_texture_channel_caches = 4;
2910                 rdev->config.si.max_gprs = 256;
2911                 rdev->config.si.max_gs_threads = 32;
2912                 rdev->config.si.max_hw_contexts = 8;
2913
2914                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2915                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2916                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2917                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2918                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2919                 break;
2920         case CHIP_OLAND:
2921                 rdev->config.si.max_shader_engines = 1;
2922                 rdev->config.si.max_tile_pipes = 4;
2923                 rdev->config.si.max_cu_per_sh = 6;
2924                 rdev->config.si.max_sh_per_se = 1;
2925                 rdev->config.si.max_backends_per_se = 2;
2926                 rdev->config.si.max_texture_channel_caches = 4;
2927                 rdev->config.si.max_gprs = 256;
2928                 rdev->config.si.max_gs_threads = 16;
2929                 rdev->config.si.max_hw_contexts = 8;
2930
2931                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2932                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2933                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2934                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2935                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2936                 break;
2937         case CHIP_HAINAN:
2938                 rdev->config.si.max_shader_engines = 1;
2939                 rdev->config.si.max_tile_pipes = 4;
2940                 rdev->config.si.max_cu_per_sh = 5;
2941                 rdev->config.si.max_sh_per_se = 1;
2942                 rdev->config.si.max_backends_per_se = 1;
2943                 rdev->config.si.max_texture_channel_caches = 2;
2944                 rdev->config.si.max_gprs = 256;
2945                 rdev->config.si.max_gs_threads = 16;
2946                 rdev->config.si.max_hw_contexts = 8;
2947
2948                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2949                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2950                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2951                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2952                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2953                 break;
2954         }
2955
2956         /* Initialize HDP */
2957         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2958                 WREG32((0x2c14 + j), 0x00000000);
2959                 WREG32((0x2c18 + j), 0x00000000);
2960                 WREG32((0x2c1c + j), 0x00000000);
2961                 WREG32((0x2c20 + j), 0x00000000);
2962                 WREG32((0x2c24 + j), 0x00000000);
2963         }
2964
2965         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2966
2967         evergreen_fix_pci_max_read_req_size(rdev);
2968
2969         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2970
2971         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2972         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2973
2974         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2975         rdev->config.si.mem_max_burst_length_bytes = 256;
2976         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2977         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2978         if (rdev->config.si.mem_row_size_in_kb > 4)
2979                 rdev->config.si.mem_row_size_in_kb = 4;
2980         /* XXX use MC settings? */
2981         rdev->config.si.shader_engine_tile_size = 32;
2982         rdev->config.si.num_gpus = 1;
2983         rdev->config.si.multi_gpu_tile_size = 64;
2984
2985         /* fix up row size */
2986         gb_addr_config &= ~ROW_SIZE_MASK;
2987         switch (rdev->config.si.mem_row_size_in_kb) {
2988         case 1:
2989         default:
2990                 gb_addr_config |= ROW_SIZE(0);
2991                 break;
2992         case 2:
2993                 gb_addr_config |= ROW_SIZE(1);
2994                 break;
2995         case 4:
2996                 gb_addr_config |= ROW_SIZE(2);
2997                 break;
2998         }
2999
3000         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3001          * not have bank info, so create a custom tiling dword.
3002          * bits 3:0   num_pipes
3003          * bits 7:4   num_banks
3004          * bits 11:8  group_size
3005          * bits 15:12 row_size
3006          */
3007         rdev->config.si.tile_config = 0;
3008         switch (rdev->config.si.num_tile_pipes) {
3009         case 1:
3010                 rdev->config.si.tile_config |= (0 << 0);
3011                 break;
3012         case 2:
3013                 rdev->config.si.tile_config |= (1 << 0);
3014                 break;
3015         case 4:
3016                 rdev->config.si.tile_config |= (2 << 0);
3017                 break;
3018         case 8:
3019         default:
3020                 /* XXX what about 12? */
3021                 rdev->config.si.tile_config |= (3 << 0);
3022                 break;
3023         }       
3024         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3025         case 0: /* four banks */
3026                 rdev->config.si.tile_config |= 0 << 4;
3027                 break;
3028         case 1: /* eight banks */
3029                 rdev->config.si.tile_config |= 1 << 4;
3030                 break;
3031         case 2: /* sixteen banks */
3032         default:
3033                 rdev->config.si.tile_config |= 2 << 4;
3034                 break;
3035         }
3036         rdev->config.si.tile_config |=
3037                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3038         rdev->config.si.tile_config |=
3039                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3040
3041         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3042         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3043         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3044         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3045         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3046         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3047         if (rdev->has_uvd) {
3048                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3049                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3050                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3051         }
3052
3053         si_tiling_mode_table_init(rdev);
3054
3055         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3056                     rdev->config.si.max_sh_per_se,
3057                     rdev->config.si.max_backends_per_se);
3058
3059         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3060                      rdev->config.si.max_sh_per_se,
3061                      rdev->config.si.max_cu_per_sh);
3062
3063
3064         /* set HW defaults for 3D engine */
3065         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3066                                      ROQ_IB2_START(0x2b)));
3067         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3068
3069         sx_debug_1 = RREG32(SX_DEBUG_1);
3070         WREG32(SX_DEBUG_1, sx_debug_1);
3071
3072         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3073
3074         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3075                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3076                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3077                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3078
3079         WREG32(VGT_NUM_INSTANCES, 1);
3080
3081         WREG32(CP_PERFMON_CNTL, 0);
3082
3083         WREG32(SQ_CONFIG, 0);
3084
3085         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3086                                           FORCE_EOV_MAX_REZ_CNT(255)));
3087
3088         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3089                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3090
3091         WREG32(VGT_GS_VERTEX_REUSE, 16);
3092         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3093
3094         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3095         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3096         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3097         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3098         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3099         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3100         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3101         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3102
3103         tmp = RREG32(HDP_MISC_CNTL);
3104         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3105         WREG32(HDP_MISC_CNTL, tmp);
3106
3107         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3108         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3109
3110         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3111
3112         udelay(50);
3113 }
3114
3115 /*
3116  * GPU scratch registers helpers function.
3117  */
3118 static void si_scratch_init(struct radeon_device *rdev)
3119 {
3120         int i;
3121
3122         rdev->scratch.num_reg = 7;
3123         rdev->scratch.reg_base = SCRATCH_REG0;
3124         for (i = 0; i < rdev->scratch.num_reg; i++) {
3125                 rdev->scratch.free[i] = true;
3126                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3127         }
3128 }
3129
3130 void si_fence_ring_emit(struct radeon_device *rdev,
3131                         struct radeon_fence *fence)
3132 {
3133         struct radeon_ring *ring = &rdev->ring[fence->ring];
3134         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3135
3136         /* flush read cache over gart */
3137         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3138         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3139         radeon_ring_write(ring, 0);
3140         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3141         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3142                           PACKET3_TC_ACTION_ENA |
3143                           PACKET3_SH_KCACHE_ACTION_ENA |
3144                           PACKET3_SH_ICACHE_ACTION_ENA);
3145         radeon_ring_write(ring, 0xFFFFFFFF);
3146         radeon_ring_write(ring, 0);
3147         radeon_ring_write(ring, 10); /* poll interval */
3148         /* EVENT_WRITE_EOP - flush caches, send int */
3149         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3150         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3151         radeon_ring_write(ring, addr & 0xffffffff);
3152         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3153         radeon_ring_write(ring, fence->seq);
3154         radeon_ring_write(ring, 0);
3155 }
3156
3157 /*
3158  * IB stuff
3159  */
3160 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3161 {
3162         struct radeon_ring *ring = &rdev->ring[ib->ring];
3163         u32 header;
3164
3165         if (ib->is_const_ib) {
3166                 /* set switch buffer packet before const IB */
3167                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3168                 radeon_ring_write(ring, 0);
3169
3170                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3171         } else {
3172                 u32 next_rptr;
3173                 if (ring->rptr_save_reg) {
3174                         next_rptr = ring->wptr + 3 + 4 + 8;
3175                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3176                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3177                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3178                         radeon_ring_write(ring, next_rptr);
3179                 } else if (rdev->wb.enabled) {
3180                         next_rptr = ring->wptr + 5 + 4 + 8;
3181                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3182                         radeon_ring_write(ring, (1 << 8));
3183                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3184                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3185                         radeon_ring_write(ring, next_rptr);
3186                 }
3187
3188                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3189         }
3190
3191         radeon_ring_write(ring, header);
3192         radeon_ring_write(ring,
3193 #ifdef __BIG_ENDIAN
3194                           (2 << 0) |
3195 #endif
3196                           (ib->gpu_addr & 0xFFFFFFFC));
3197         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3198         radeon_ring_write(ring, ib->length_dw |
3199                           (ib->vm ? (ib->vm->id << 24) : 0));
3200
3201         if (!ib->is_const_ib) {
3202                 /* flush read cache over gart for this vmid */
3203                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3204                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3205                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3206                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3207                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3208                                   PACKET3_TC_ACTION_ENA |
3209                                   PACKET3_SH_KCACHE_ACTION_ENA |
3210                                   PACKET3_SH_ICACHE_ACTION_ENA);
3211                 radeon_ring_write(ring, 0xFFFFFFFF);
3212                 radeon_ring_write(ring, 0);
3213                 radeon_ring_write(ring, 10); /* poll interval */
3214         }
3215 }
3216
3217 /*
3218  * CP.
3219  */
3220 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3221 {
3222         if (enable)
3223                 WREG32(CP_ME_CNTL, 0);
3224         else {
3225                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3226                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3227                 WREG32(SCRATCH_UMSK, 0);
3228                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3229                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3230                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3231         }
3232         udelay(50);
3233 }
3234
3235 static int si_cp_load_microcode(struct radeon_device *rdev)
3236 {
3237         const __be32 *fw_data;
3238         int i;
3239
3240         if (!rdev->me_fw || !rdev->pfp_fw)
3241                 return -EINVAL;
3242
3243         si_cp_enable(rdev, false);
3244
3245         /* PFP */
3246         fw_data = (const __be32 *)rdev->pfp_fw->data;
3247         WREG32(CP_PFP_UCODE_ADDR, 0);
3248         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3249                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3250         WREG32(CP_PFP_UCODE_ADDR, 0);
3251
3252         /* CE */
3253         fw_data = (const __be32 *)rdev->ce_fw->data;
3254         WREG32(CP_CE_UCODE_ADDR, 0);
3255         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3256                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3257         WREG32(CP_CE_UCODE_ADDR, 0);
3258
3259         /* ME */
3260         fw_data = (const __be32 *)rdev->me_fw->data;
3261         WREG32(CP_ME_RAM_WADDR, 0);
3262         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3263                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3264         WREG32(CP_ME_RAM_WADDR, 0);
3265
3266         WREG32(CP_PFP_UCODE_ADDR, 0);
3267         WREG32(CP_CE_UCODE_ADDR, 0);
3268         WREG32(CP_ME_RAM_WADDR, 0);
3269         WREG32(CP_ME_RAM_RADDR, 0);
3270         return 0;
3271 }
3272
3273 static int si_cp_start(struct radeon_device *rdev)
3274 {
3275         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3276         int r, i;
3277
3278         r = radeon_ring_lock(rdev, ring, 7 + 4);
3279         if (r) {
3280                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3281                 return r;
3282         }
3283         /* init the CP */
3284         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3285         radeon_ring_write(ring, 0x1);
3286         radeon_ring_write(ring, 0x0);
3287         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3288         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3289         radeon_ring_write(ring, 0);
3290         radeon_ring_write(ring, 0);
3291
3292         /* init the CE partitions */
3293         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3294         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3295         radeon_ring_write(ring, 0xc000);
3296         radeon_ring_write(ring, 0xe000);
3297         radeon_ring_unlock_commit(rdev, ring);
3298
3299         si_cp_enable(rdev, true);
3300
3301         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3302         if (r) {
3303                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3304                 return r;
3305         }
3306
3307         /* setup clear context state */
3308         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3309         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3310
3311         for (i = 0; i < si_default_size; i++)
3312                 radeon_ring_write(ring, si_default_state[i]);
3313
3314         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3315         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3316
3317         /* set clear context state */
3318         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3319         radeon_ring_write(ring, 0);
3320
3321         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3322         radeon_ring_write(ring, 0x00000316);
3323         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3324         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3325
3326         radeon_ring_unlock_commit(rdev, ring);
3327
3328         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3329                 ring = &rdev->ring[i];
3330                 r = radeon_ring_lock(rdev, ring, 2);
3331
3332                 /* clear the compute context state */
3333                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3334                 radeon_ring_write(ring, 0);
3335
3336                 radeon_ring_unlock_commit(rdev, ring);
3337         }
3338
3339         return 0;
3340 }
3341
3342 static void si_cp_fini(struct radeon_device *rdev)
3343 {
3344         struct radeon_ring *ring;
3345         si_cp_enable(rdev, false);
3346
3347         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3348         radeon_ring_fini(rdev, ring);
3349         radeon_scratch_free(rdev, ring->rptr_save_reg);
3350
3351         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3352         radeon_ring_fini(rdev, ring);
3353         radeon_scratch_free(rdev, ring->rptr_save_reg);
3354
3355         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3356         radeon_ring_fini(rdev, ring);
3357         radeon_scratch_free(rdev, ring->rptr_save_reg);
3358 }
3359
3360 static int si_cp_resume(struct radeon_device *rdev)
3361 {
3362         struct radeon_ring *ring;
3363         u32 tmp;
3364         u32 rb_bufsz;
3365         int r;
3366
3367         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3368         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3369                                  SOFT_RESET_PA |
3370                                  SOFT_RESET_VGT |
3371                                  SOFT_RESET_SPI |
3372                                  SOFT_RESET_SX));
3373         RREG32(GRBM_SOFT_RESET);
3374         mdelay(15);
3375         WREG32(GRBM_SOFT_RESET, 0);
3376         RREG32(GRBM_SOFT_RESET);
3377
3378         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3379         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3380
3381         /* Set the write pointer delay */
3382         WREG32(CP_RB_WPTR_DELAY, 0);
3383
3384         WREG32(CP_DEBUG, 0);
3385         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3386
3387         /* ring 0 - compute and gfx */
3388         /* Set ring buffer size */
3389         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3390         rb_bufsz = drm_order(ring->ring_size / 8);
3391         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3392 #ifdef __BIG_ENDIAN
3393         tmp |= BUF_SWAP_32BIT;
3394 #endif
3395         WREG32(CP_RB0_CNTL, tmp);
3396
3397         /* Initialize the ring buffer's read and write pointers */
3398         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3399         ring->wptr = 0;
3400         WREG32(CP_RB0_WPTR, ring->wptr);
3401
3402         /* set the wb address whether it's enabled or not */
3403         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3404         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3405
3406         if (rdev->wb.enabled)
3407                 WREG32(SCRATCH_UMSK, 0xff);
3408         else {
3409                 tmp |= RB_NO_UPDATE;
3410                 WREG32(SCRATCH_UMSK, 0);
3411         }
3412
3413         mdelay(1);
3414         WREG32(CP_RB0_CNTL, tmp);
3415
3416         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3417
3418         ring->rptr = RREG32(CP_RB0_RPTR);
3419
3420         /* ring1  - compute only */
3421         /* Set ring buffer size */
3422         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3423         rb_bufsz = drm_order(ring->ring_size / 8);
3424         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3425 #ifdef __BIG_ENDIAN
3426         tmp |= BUF_SWAP_32BIT;
3427 #endif
3428         WREG32(CP_RB1_CNTL, tmp);
3429
3430         /* Initialize the ring buffer's read and write pointers */
3431         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3432         ring->wptr = 0;
3433         WREG32(CP_RB1_WPTR, ring->wptr);
3434
3435         /* set the wb address whether it's enabled or not */
3436         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3437         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3438
3439         mdelay(1);
3440         WREG32(CP_RB1_CNTL, tmp);
3441
3442         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3443
3444         ring->rptr = RREG32(CP_RB1_RPTR);
3445
3446         /* ring2 - compute only */
3447         /* Set ring buffer size */
3448         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3449         rb_bufsz = drm_order(ring->ring_size / 8);
3450         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3451 #ifdef __BIG_ENDIAN
3452         tmp |= BUF_SWAP_32BIT;
3453 #endif
3454         WREG32(CP_RB2_CNTL, tmp);
3455
3456         /* Initialize the ring buffer's read and write pointers */
3457         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3458         ring->wptr = 0;
3459         WREG32(CP_RB2_WPTR, ring->wptr);
3460
3461         /* set the wb address whether it's enabled or not */
3462         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3463         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3464
3465         mdelay(1);
3466         WREG32(CP_RB2_CNTL, tmp);
3467
3468         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3469
3470         ring->rptr = RREG32(CP_RB2_RPTR);
3471
3472         /* start the rings */
3473         si_cp_start(rdev);
3474         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3475         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3476         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3477         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3478         if (r) {
3479                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3480                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3481                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3482                 return r;
3483         }
3484         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3485         if (r) {
3486                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3487         }
3488         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3489         if (r) {
3490                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3491         }
3492
3493         return 0;
3494 }
3495
3496 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3497 {
3498         u32 reset_mask = 0;
3499         u32 tmp;
3500
3501         /* GRBM_STATUS */
3502         tmp = RREG32(GRBM_STATUS);
3503         if (tmp & (PA_BUSY | SC_BUSY |
3504                    BCI_BUSY | SX_BUSY |
3505                    TA_BUSY | VGT_BUSY |
3506                    DB_BUSY | CB_BUSY |
3507                    GDS_BUSY | SPI_BUSY |
3508                    IA_BUSY | IA_BUSY_NO_DMA))
3509                 reset_mask |= RADEON_RESET_GFX;
3510
3511         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3512                    CP_BUSY | CP_COHERENCY_BUSY))
3513                 reset_mask |= RADEON_RESET_CP;
3514
3515         if (tmp & GRBM_EE_BUSY)
3516                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3517
3518         /* GRBM_STATUS2 */
3519         tmp = RREG32(GRBM_STATUS2);
3520         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3521                 reset_mask |= RADEON_RESET_RLC;
3522
3523         /* DMA_STATUS_REG 0 */
3524         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3525         if (!(tmp & DMA_IDLE))
3526                 reset_mask |= RADEON_RESET_DMA;
3527
3528         /* DMA_STATUS_REG 1 */
3529         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3530         if (!(tmp & DMA_IDLE))
3531                 reset_mask |= RADEON_RESET_DMA1;
3532
3533         /* SRBM_STATUS2 */
3534         tmp = RREG32(SRBM_STATUS2);
3535         if (tmp & DMA_BUSY)
3536                 reset_mask |= RADEON_RESET_DMA;
3537
3538         if (tmp & DMA1_BUSY)
3539                 reset_mask |= RADEON_RESET_DMA1;
3540
3541         /* SRBM_STATUS */
3542         tmp = RREG32(SRBM_STATUS);
3543
3544         if (tmp & IH_BUSY)
3545                 reset_mask |= RADEON_RESET_IH;
3546
3547         if (tmp & SEM_BUSY)
3548                 reset_mask |= RADEON_RESET_SEM;
3549
3550         if (tmp & GRBM_RQ_PENDING)
3551                 reset_mask |= RADEON_RESET_GRBM;
3552
3553         if (tmp & VMC_BUSY)
3554                 reset_mask |= RADEON_RESET_VMC;
3555
3556         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3557                    MCC_BUSY | MCD_BUSY))
3558                 reset_mask |= RADEON_RESET_MC;
3559
3560         if (evergreen_is_display_hung(rdev))
3561                 reset_mask |= RADEON_RESET_DISPLAY;
3562
3563         /* VM_L2_STATUS */
3564         tmp = RREG32(VM_L2_STATUS);
3565         if (tmp & L2_BUSY)
3566                 reset_mask |= RADEON_RESET_VMC;
3567
3568         /* Skip MC reset as it's mostly likely not hung, just busy */
3569         if (reset_mask & RADEON_RESET_MC) {
3570                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3571                 reset_mask &= ~RADEON_RESET_MC;
3572         }
3573
3574         return reset_mask;
3575 }
3576
3577 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3578 {
3579         struct evergreen_mc_save save;
3580         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3581         u32 tmp;
3582
3583         if (reset_mask == 0)
3584                 return;
3585
3586         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3587
3588         evergreen_print_gpu_status_regs(rdev);
3589         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3590                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3591         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3592                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3593
3594         /* Disable CP parsing/prefetching */
3595         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3596
3597         if (reset_mask & RADEON_RESET_DMA) {
3598                 /* dma0 */
3599                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3600                 tmp &= ~DMA_RB_ENABLE;
3601                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3602         }
3603         if (reset_mask & RADEON_RESET_DMA1) {
3604                 /* dma1 */
3605                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3606                 tmp &= ~DMA_RB_ENABLE;
3607                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3608         }
3609
3610         udelay(50);
3611
3612         evergreen_mc_stop(rdev, &save);
3613         if (evergreen_mc_wait_for_idle(rdev)) {
3614                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3615         }
3616
3617         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3618                 grbm_soft_reset = SOFT_RESET_CB |
3619                         SOFT_RESET_DB |
3620                         SOFT_RESET_GDS |
3621                         SOFT_RESET_PA |
3622                         SOFT_RESET_SC |
3623                         SOFT_RESET_BCI |
3624                         SOFT_RESET_SPI |
3625                         SOFT_RESET_SX |
3626                         SOFT_RESET_TC |
3627                         SOFT_RESET_TA |
3628                         SOFT_RESET_VGT |
3629                         SOFT_RESET_IA;
3630         }
3631
3632         if (reset_mask & RADEON_RESET_CP) {
3633                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3634
3635                 srbm_soft_reset |= SOFT_RESET_GRBM;
3636         }
3637
3638         if (reset_mask & RADEON_RESET_DMA)
3639                 srbm_soft_reset |= SOFT_RESET_DMA;
3640
3641         if (reset_mask & RADEON_RESET_DMA1)
3642                 srbm_soft_reset |= SOFT_RESET_DMA1;
3643
3644         if (reset_mask & RADEON_RESET_DISPLAY)
3645                 srbm_soft_reset |= SOFT_RESET_DC;
3646
3647         if (reset_mask & RADEON_RESET_RLC)
3648                 grbm_soft_reset |= SOFT_RESET_RLC;
3649
3650         if (reset_mask & RADEON_RESET_SEM)
3651                 srbm_soft_reset |= SOFT_RESET_SEM;
3652
3653         if (reset_mask & RADEON_RESET_IH)
3654                 srbm_soft_reset |= SOFT_RESET_IH;
3655
3656         if (reset_mask & RADEON_RESET_GRBM)
3657                 srbm_soft_reset |= SOFT_RESET_GRBM;
3658
3659         if (reset_mask & RADEON_RESET_VMC)
3660                 srbm_soft_reset |= SOFT_RESET_VMC;
3661
3662         if (reset_mask & RADEON_RESET_MC)
3663                 srbm_soft_reset |= SOFT_RESET_MC;
3664
3665         if (grbm_soft_reset) {
3666                 tmp = RREG32(GRBM_SOFT_RESET);
3667                 tmp |= grbm_soft_reset;
3668                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3669                 WREG32(GRBM_SOFT_RESET, tmp);
3670                 tmp = RREG32(GRBM_SOFT_RESET);
3671
3672                 udelay(50);
3673
3674                 tmp &= ~grbm_soft_reset;
3675                 WREG32(GRBM_SOFT_RESET, tmp);
3676                 tmp = RREG32(GRBM_SOFT_RESET);
3677         }
3678
3679         if (srbm_soft_reset) {
3680                 tmp = RREG32(SRBM_SOFT_RESET);
3681                 tmp |= srbm_soft_reset;
3682                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3683                 WREG32(SRBM_SOFT_RESET, tmp);
3684                 tmp = RREG32(SRBM_SOFT_RESET);
3685
3686                 udelay(50);
3687
3688                 tmp &= ~srbm_soft_reset;
3689                 WREG32(SRBM_SOFT_RESET, tmp);
3690                 tmp = RREG32(SRBM_SOFT_RESET);
3691         }
3692
3693         /* Wait a little for things to settle down */
3694         udelay(50);
3695
3696         evergreen_mc_resume(rdev, &save);
3697         udelay(50);
3698
3699         evergreen_print_gpu_status_regs(rdev);
3700 }
3701
3702 int si_asic_reset(struct radeon_device *rdev)
3703 {
3704         u32 reset_mask;
3705
3706         reset_mask = si_gpu_check_soft_reset(rdev);
3707
3708         if (reset_mask)
3709                 r600_set_bios_scratch_engine_hung(rdev, true);
3710
3711         si_gpu_soft_reset(rdev, reset_mask);
3712
3713         reset_mask = si_gpu_check_soft_reset(rdev);
3714
3715         if (!reset_mask)
3716                 r600_set_bios_scratch_engine_hung(rdev, false);
3717
3718         return 0;
3719 }
3720
3721 /**
3722  * si_gfx_is_lockup - Check if the GFX engine is locked up
3723  *
3724  * @rdev: radeon_device pointer
3725  * @ring: radeon_ring structure holding ring information
3726  *
3727  * Check if the GFX engine is locked up.
3728  * Returns true if the engine appears to be locked up, false if not.
3729  */
3730 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3731 {
3732         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3733
3734         if (!(reset_mask & (RADEON_RESET_GFX |
3735                             RADEON_RESET_COMPUTE |
3736                             RADEON_RESET_CP))) {
3737                 radeon_ring_lockup_update(ring);
3738                 return false;
3739         }
3740         /* force CP activities */
3741         radeon_ring_force_activity(rdev, ring);
3742         return radeon_ring_test_lockup(rdev, ring);
3743 }
3744
3745 /**
3746  * si_dma_is_lockup - Check if the DMA engine is locked up
3747  *
3748  * @rdev: radeon_device pointer
3749  * @ring: radeon_ring structure holding ring information
3750  *
3751  * Check if the async DMA engine is locked up.
3752  * Returns true if the engine appears to be locked up, false if not.
3753  */
3754 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3755 {
3756         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3757         u32 mask;
3758
3759         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3760                 mask = RADEON_RESET_DMA;
3761         else
3762                 mask = RADEON_RESET_DMA1;
3763
3764         if (!(reset_mask & mask)) {
3765                 radeon_ring_lockup_update(ring);
3766                 return false;
3767         }
3768         /* force ring activities */
3769         radeon_ring_force_activity(rdev, ring);
3770         return radeon_ring_test_lockup(rdev, ring);
3771 }
3772
3773 /* MC */
3774 static void si_mc_program(struct radeon_device *rdev)
3775 {
3776         struct evergreen_mc_save save;
3777         u32 tmp;
3778         int i, j;
3779
3780         /* Initialize HDP */
3781         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3782                 WREG32((0x2c14 + j), 0x00000000);
3783                 WREG32((0x2c18 + j), 0x00000000);
3784                 WREG32((0x2c1c + j), 0x00000000);
3785                 WREG32((0x2c20 + j), 0x00000000);
3786                 WREG32((0x2c24 + j), 0x00000000);
3787         }
3788         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3789
3790         evergreen_mc_stop(rdev, &save);
3791         if (radeon_mc_wait_for_idle(rdev)) {
3792                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3793         }
3794         if (!ASIC_IS_NODCE(rdev))
3795                 /* Lockout access through VGA aperture*/
3796                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3797         /* Update configuration */
3798         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3799                rdev->mc.vram_start >> 12);
3800         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3801                rdev->mc.vram_end >> 12);
3802         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3803                rdev->vram_scratch.gpu_addr >> 12);
3804         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3805         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3806         WREG32(MC_VM_FB_LOCATION, tmp);
3807         /* XXX double check these! */
3808         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3809         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3810         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3811         WREG32(MC_VM_AGP_BASE, 0);
3812         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3813         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3814         if (radeon_mc_wait_for_idle(rdev)) {
3815                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3816         }
3817         evergreen_mc_resume(rdev, &save);
3818         if (!ASIC_IS_NODCE(rdev)) {
3819                 /* we need to own VRAM, so turn off the VGA renderer here
3820                  * to stop it overwriting our objects */
3821                 rv515_vga_render_disable(rdev);
3822         }
3823 }
3824
3825 void si_vram_gtt_location(struct radeon_device *rdev,
3826                           struct radeon_mc *mc)
3827 {
3828         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3829                 /* leave room for at least 1024M GTT */
3830                 dev_warn(rdev->dev, "limiting VRAM\n");
3831                 mc->real_vram_size = 0xFFC0000000ULL;
3832                 mc->mc_vram_size = 0xFFC0000000ULL;
3833         }
3834         radeon_vram_location(rdev, &rdev->mc, 0);
3835         rdev->mc.gtt_base_align = 0;
3836         radeon_gtt_location(rdev, mc);
3837 }
3838
3839 static int si_mc_init(struct radeon_device *rdev)
3840 {
3841         u32 tmp;
3842         int chansize, numchan;
3843
3844         /* Get VRAM informations */
3845         rdev->mc.vram_is_ddr = true;
3846         tmp = RREG32(MC_ARB_RAMCFG);
3847         if (tmp & CHANSIZE_OVERRIDE) {
3848                 chansize = 16;
3849         } else if (tmp & CHANSIZE_MASK) {
3850                 chansize = 64;
3851         } else {
3852                 chansize = 32;
3853         }
3854         tmp = RREG32(MC_SHARED_CHMAP);
3855         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3856         case 0:
3857         default:
3858                 numchan = 1;
3859                 break;
3860         case 1:
3861                 numchan = 2;
3862                 break;
3863         case 2:
3864                 numchan = 4;
3865                 break;
3866         case 3:
3867                 numchan = 8;
3868                 break;
3869         case 4:
3870                 numchan = 3;
3871                 break;
3872         case 5:
3873                 numchan = 6;
3874                 break;
3875         case 6:
3876                 numchan = 10;
3877                 break;
3878         case 7:
3879                 numchan = 12;
3880                 break;
3881         case 8:
3882                 numchan = 16;
3883                 break;
3884         }
3885         rdev->mc.vram_width = numchan * chansize;
3886         /* Could aper size report 0 ? */
3887         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3888         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3889         /* size in MB on si */
3890         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3891         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3892         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3893         si_vram_gtt_location(rdev, &rdev->mc);
3894         radeon_update_bandwidth_info(rdev);
3895
3896         return 0;
3897 }
3898
3899 /*
3900  * GART
3901  */
3902 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3903 {
3904         /* flush hdp cache */
3905         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3906
3907         /* bits 0-15 are the VM contexts0-15 */
3908         WREG32(VM_INVALIDATE_REQUEST, 1);
3909 }
3910
3911 static int si_pcie_gart_enable(struct radeon_device *rdev)
3912 {
3913         int r, i;
3914
3915         if (rdev->gart.robj == NULL) {
3916                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3917                 return -EINVAL;
3918         }
3919         r = radeon_gart_table_vram_pin(rdev);
3920         if (r)
3921                 return r;
3922         radeon_gart_restore(rdev);
3923         /* Setup TLB control */
3924         WREG32(MC_VM_MX_L1_TLB_CNTL,
3925                (0xA << 7) |
3926                ENABLE_L1_TLB |
3927                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3928                ENABLE_ADVANCED_DRIVER_MODEL |
3929                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3930         /* Setup L2 cache */
3931         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3932                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3933                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3934                EFFECTIVE_L2_QUEUE_SIZE(7) |
3935                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3936         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3937         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3938                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3939         /* setup context0 */
3940         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3941         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3942         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3943         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3944                         (u32)(rdev->dummy_page.addr >> 12));
3945         WREG32(VM_CONTEXT0_CNTL2, 0);
3946         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3947                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3948
3949         WREG32(0x15D4, 0);
3950         WREG32(0x15D8, 0);
3951         WREG32(0x15DC, 0);
3952
3953         /* empty context1-15 */
3954         /* set vm size, must be a multiple of 4 */
3955         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3956         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3957         /* Assign the pt base to something valid for now; the pts used for
3958          * the VMs are determined by the application and setup and assigned
3959          * on the fly in the vm part of radeon_gart.c
3960          */
3961         for (i = 1; i < 16; i++) {
3962                 if (i < 8)
3963                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3964                                rdev->gart.table_addr >> 12);
3965                 else
3966                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3967                                rdev->gart.table_addr >> 12);
3968         }
3969
3970         /* enable context1-15 */
3971         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3972                (u32)(rdev->dummy_page.addr >> 12));
3973         WREG32(VM_CONTEXT1_CNTL2, 4);
3974         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3975                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3977                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3983                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3985                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3987
3988         si_pcie_gart_tlb_flush(rdev);
3989         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3990                  (unsigned)(rdev->mc.gtt_size >> 20),
3991                  (unsigned long long)rdev->gart.table_addr);
3992         rdev->gart.ready = true;
3993         return 0;
3994 }
3995
3996 static void si_pcie_gart_disable(struct radeon_device *rdev)
3997 {
3998         /* Disable all tables */
3999         WREG32(VM_CONTEXT0_CNTL, 0);
4000         WREG32(VM_CONTEXT1_CNTL, 0);
4001         /* Setup TLB control */
4002         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4003                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4004         /* Setup L2 cache */
4005         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4006                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4007                EFFECTIVE_L2_QUEUE_SIZE(7) |
4008                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4009         WREG32(VM_L2_CNTL2, 0);
4010         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4011                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4012         radeon_gart_table_vram_unpin(rdev);
4013 }
4014
4015 static void si_pcie_gart_fini(struct radeon_device *rdev)
4016 {
4017         si_pcie_gart_disable(rdev);
4018         radeon_gart_table_vram_free(rdev);
4019         radeon_gart_fini(rdev);
4020 }
4021
4022 /* vm parser */
4023 static bool si_vm_reg_valid(u32 reg)
4024 {
4025         /* context regs are fine */
4026         if (reg >= 0x28000)
4027                 return true;
4028
4029         /* check config regs */
4030         switch (reg) {
4031         case GRBM_GFX_INDEX:
4032         case CP_STRMOUT_CNTL:
4033         case VGT_VTX_VECT_EJECT_REG:
4034         case VGT_CACHE_INVALIDATION:
4035         case VGT_ESGS_RING_SIZE:
4036         case VGT_GSVS_RING_SIZE:
4037         case VGT_GS_VERTEX_REUSE:
4038         case VGT_PRIMITIVE_TYPE:
4039         case VGT_INDEX_TYPE:
4040         case VGT_NUM_INDICES:
4041         case VGT_NUM_INSTANCES:
4042         case VGT_TF_RING_SIZE:
4043         case VGT_HS_OFFCHIP_PARAM:
4044         case VGT_TF_MEMORY_BASE:
4045         case PA_CL_ENHANCE:
4046         case PA_SU_LINE_STIPPLE_VALUE:
4047         case PA_SC_LINE_STIPPLE_STATE:
4048         case PA_SC_ENHANCE:
4049         case SQC_CACHES:
4050         case SPI_STATIC_THREAD_MGMT_1:
4051         case SPI_STATIC_THREAD_MGMT_2:
4052         case SPI_STATIC_THREAD_MGMT_3:
4053         case SPI_PS_MAX_WAVE_ID:
4054         case SPI_CONFIG_CNTL:
4055         case SPI_CONFIG_CNTL_1:
4056         case TA_CNTL_AUX:
4057                 return true;
4058         default:
4059                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4060                 return false;
4061         }
4062 }
4063
4064 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4065                                   u32 *ib, struct radeon_cs_packet *pkt)
4066 {
4067         switch (pkt->opcode) {
4068         case PACKET3_NOP:
4069         case PACKET3_SET_BASE:
4070         case PACKET3_SET_CE_DE_COUNTERS:
4071         case PACKET3_LOAD_CONST_RAM:
4072         case PACKET3_WRITE_CONST_RAM:
4073         case PACKET3_WRITE_CONST_RAM_OFFSET:
4074         case PACKET3_DUMP_CONST_RAM:
4075         case PACKET3_INCREMENT_CE_COUNTER:
4076         case PACKET3_WAIT_ON_DE_COUNTER:
4077         case PACKET3_CE_WRITE:
4078                 break;
4079         default:
4080                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4081                 return -EINVAL;
4082         }
4083         return 0;
4084 }
4085
4086 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4087                                    u32 *ib, struct radeon_cs_packet *pkt)
4088 {
4089         u32 idx = pkt->idx + 1;
4090         u32 idx_value = ib[idx];
4091         u32 start_reg, end_reg, reg, i;
4092         u32 command, info;
4093
4094         switch (pkt->opcode) {
4095         case PACKET3_NOP:
4096         case PACKET3_SET_BASE:
4097         case PACKET3_CLEAR_STATE:
4098         case PACKET3_INDEX_BUFFER_SIZE:
4099         case PACKET3_DISPATCH_DIRECT:
4100         case PACKET3_DISPATCH_INDIRECT:
4101         case PACKET3_ALLOC_GDS:
4102         case PACKET3_WRITE_GDS_RAM:
4103         case PACKET3_ATOMIC_GDS:
4104         case PACKET3_ATOMIC:
4105         case PACKET3_OCCLUSION_QUERY:
4106         case PACKET3_SET_PREDICATION:
4107         case PACKET3_COND_EXEC:
4108         case PACKET3_PRED_EXEC:
4109         case PACKET3_DRAW_INDIRECT:
4110         case PACKET3_DRAW_INDEX_INDIRECT:
4111         case PACKET3_INDEX_BASE:
4112         case PACKET3_DRAW_INDEX_2:
4113         case PACKET3_CONTEXT_CONTROL:
4114         case PACKET3_INDEX_TYPE:
4115         case PACKET3_DRAW_INDIRECT_MULTI:
4116         case PACKET3_DRAW_INDEX_AUTO:
4117         case PACKET3_DRAW_INDEX_IMMD:
4118         case PACKET3_NUM_INSTANCES:
4119         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4120         case PACKET3_STRMOUT_BUFFER_UPDATE:
4121         case PACKET3_DRAW_INDEX_OFFSET_2:
4122         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4123         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4124         case PACKET3_MPEG_INDEX:
4125         case PACKET3_WAIT_REG_MEM:
4126         case PACKET3_MEM_WRITE:
4127         case PACKET3_PFP_SYNC_ME:
4128         case PACKET3_SURFACE_SYNC:
4129         case PACKET3_EVENT_WRITE:
4130         case PACKET3_EVENT_WRITE_EOP:
4131         case PACKET3_EVENT_WRITE_EOS:
4132         case PACKET3_SET_CONTEXT_REG:
4133         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4134         case PACKET3_SET_SH_REG:
4135         case PACKET3_SET_SH_REG_OFFSET:
4136         case PACKET3_INCREMENT_DE_COUNTER:
4137         case PACKET3_WAIT_ON_CE_COUNTER:
4138         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4139         case PACKET3_ME_WRITE:
4140                 break;
4141         case PACKET3_COPY_DATA:
4142                 if ((idx_value & 0xf00) == 0) {
4143                         reg = ib[idx + 3] * 4;
4144                         if (!si_vm_reg_valid(reg))
4145                                 return -EINVAL;
4146                 }
4147                 break;
4148         case PACKET3_WRITE_DATA:
4149                 if ((idx_value & 0xf00) == 0) {
4150                         start_reg = ib[idx + 1] * 4;
4151                         if (idx_value & 0x10000) {
4152                                 if (!si_vm_reg_valid(start_reg))
4153                                         return -EINVAL;
4154                         } else {
4155                                 for (i = 0; i < (pkt->count - 2); i++) {
4156                                         reg = start_reg + (4 * i);
4157                                         if (!si_vm_reg_valid(reg))
4158                                                 return -EINVAL;
4159                                 }
4160                         }
4161                 }
4162                 break;
4163         case PACKET3_COND_WRITE:
4164                 if (idx_value & 0x100) {
4165                         reg = ib[idx + 5] * 4;
4166                         if (!si_vm_reg_valid(reg))
4167                                 return -EINVAL;
4168                 }
4169                 break;
4170         case PACKET3_COPY_DW:
4171                 if (idx_value & 0x2) {
4172                         reg = ib[idx + 3] * 4;
4173                         if (!si_vm_reg_valid(reg))
4174                                 return -EINVAL;
4175                 }
4176                 break;
4177         case PACKET3_SET_CONFIG_REG:
4178                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4179                 end_reg = 4 * pkt->count + start_reg - 4;
4180                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4181                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4182                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4183                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4184                         return -EINVAL;
4185                 }
4186                 for (i = 0; i < pkt->count; i++) {
4187                         reg = start_reg + (4 * i);
4188                         if (!si_vm_reg_valid(reg))
4189                                 return -EINVAL;
4190                 }
4191                 break;
4192         case PACKET3_CP_DMA:
4193                 command = ib[idx + 4];
4194                 info = ib[idx + 1];
4195                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4196                         /* src address space is register */
4197                         if (((info & 0x60000000) >> 29) == 0) {
4198                                 start_reg = idx_value << 2;
4199                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4200                                         reg = start_reg;
4201                                         if (!si_vm_reg_valid(reg)) {
4202                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4203                                                 return -EINVAL;
4204                                         }
4205                                 } else {
4206                                         for (i = 0; i < (command & 0x1fffff); i++) {
4207                                                 reg = start_reg + (4 * i);
4208                                                 if (!si_vm_reg_valid(reg)) {
4209                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4210                                                         return -EINVAL;
4211                                                 }
4212                                         }
4213                                 }
4214                         }
4215                 }
4216                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4217                         /* dst address space is register */
4218                         if (((info & 0x00300000) >> 20) == 0) {
4219                                 start_reg = ib[idx + 2];
4220                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4221                                         reg = start_reg;
4222                                         if (!si_vm_reg_valid(reg)) {
4223                                                 DRM_ERROR("CP DMA Bad DST register\n");
4224                                                 return -EINVAL;
4225                                         }
4226                                 } else {
4227                                         for (i = 0; i < (command & 0x1fffff); i++) {
4228                                                 reg = start_reg + (4 * i);
4229                                                 if (!si_vm_reg_valid(reg)) {
4230                                                         DRM_ERROR("CP DMA Bad DST register\n");
4231                                                         return -EINVAL;
4232                                                 }
4233                                         }
4234                                 }
4235                         }
4236                 }
4237                 break;
4238         default:
4239                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4240                 return -EINVAL;
4241         }
4242         return 0;
4243 }
4244
4245 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4246                                        u32 *ib, struct radeon_cs_packet *pkt)
4247 {
4248         u32 idx = pkt->idx + 1;
4249         u32 idx_value = ib[idx];
4250         u32 start_reg, reg, i;
4251
4252         switch (pkt->opcode) {
4253         case PACKET3_NOP:
4254         case PACKET3_SET_BASE:
4255         case PACKET3_CLEAR_STATE:
4256         case PACKET3_DISPATCH_DIRECT:
4257         case PACKET3_DISPATCH_INDIRECT:
4258         case PACKET3_ALLOC_GDS:
4259         case PACKET3_WRITE_GDS_RAM:
4260         case PACKET3_ATOMIC_GDS:
4261         case PACKET3_ATOMIC:
4262         case PACKET3_OCCLUSION_QUERY:
4263         case PACKET3_SET_PREDICATION:
4264         case PACKET3_COND_EXEC:
4265         case PACKET3_PRED_EXEC:
4266         case PACKET3_CONTEXT_CONTROL:
4267         case PACKET3_STRMOUT_BUFFER_UPDATE:
4268         case PACKET3_WAIT_REG_MEM:
4269         case PACKET3_MEM_WRITE:
4270         case PACKET3_PFP_SYNC_ME:
4271         case PACKET3_SURFACE_SYNC:
4272         case PACKET3_EVENT_WRITE:
4273         case PACKET3_EVENT_WRITE_EOP:
4274         case PACKET3_EVENT_WRITE_EOS:
4275         case PACKET3_SET_CONTEXT_REG:
4276         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4277         case PACKET3_SET_SH_REG:
4278         case PACKET3_SET_SH_REG_OFFSET:
4279         case PACKET3_INCREMENT_DE_COUNTER:
4280         case PACKET3_WAIT_ON_CE_COUNTER:
4281         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4282         case PACKET3_ME_WRITE:
4283                 break;
4284         case PACKET3_COPY_DATA:
4285                 if ((idx_value & 0xf00) == 0) {
4286                         reg = ib[idx + 3] * 4;
4287                         if (!si_vm_reg_valid(reg))
4288                                 return -EINVAL;
4289                 }
4290                 break;
4291         case PACKET3_WRITE_DATA:
4292                 if ((idx_value & 0xf00) == 0) {
4293                         start_reg = ib[idx + 1] * 4;
4294                         if (idx_value & 0x10000) {
4295                                 if (!si_vm_reg_valid(start_reg))
4296                                         return -EINVAL;
4297                         } else {
4298                                 for (i = 0; i < (pkt->count - 2); i++) {
4299                                         reg = start_reg + (4 * i);
4300                                         if (!si_vm_reg_valid(reg))
4301                                                 return -EINVAL;
4302                                 }
4303                         }
4304                 }
4305                 break;
4306         case PACKET3_COND_WRITE:
4307                 if (idx_value & 0x100) {
4308                         reg = ib[idx + 5] * 4;
4309                         if (!si_vm_reg_valid(reg))
4310                                 return -EINVAL;
4311                 }
4312                 break;
4313         case PACKET3_COPY_DW:
4314                 if (idx_value & 0x2) {
4315                         reg = ib[idx + 3] * 4;
4316                         if (!si_vm_reg_valid(reg))
4317                                 return -EINVAL;
4318                 }
4319                 break;
4320         default:
4321                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4322                 return -EINVAL;
4323         }
4324         return 0;
4325 }
4326
4327 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4328 {
4329         int ret = 0;
4330         u32 idx = 0;
4331         struct radeon_cs_packet pkt;
4332
4333         do {
4334                 pkt.idx = idx;
4335                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4336                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4337                 pkt.one_reg_wr = 0;
4338                 switch (pkt.type) {
4339                 case RADEON_PACKET_TYPE0:
4340                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4341                         ret = -EINVAL;
4342                         break;
4343                 case RADEON_PACKET_TYPE2:
4344                         idx += 1;
4345                         break;
4346                 case RADEON_PACKET_TYPE3:
4347                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4348                         if (ib->is_const_ib)
4349                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4350                         else {
4351                                 switch (ib->ring) {
4352                                 case RADEON_RING_TYPE_GFX_INDEX:
4353                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4354                                         break;
4355                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4356                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4357                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4358                                         break;
4359                                 default:
4360                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4361                                         ret = -EINVAL;
4362                                         break;
4363                                 }
4364                         }
4365                         idx += pkt.count + 2;
4366                         break;
4367                 default:
4368                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4369                         ret = -EINVAL;
4370                         break;
4371                 }
4372                 if (ret)
4373                         break;
4374         } while (idx < ib->length_dw);
4375
4376         return ret;
4377 }
4378
4379 /*
4380  * vm
4381  */
4382 int si_vm_init(struct radeon_device *rdev)
4383 {
4384         /* number of VMs */
4385         rdev->vm_manager.nvm = 16;
4386         /* base offset of vram pages */
4387         rdev->vm_manager.vram_base_offset = 0;
4388
4389         return 0;
4390 }
4391
4392 void si_vm_fini(struct radeon_device *rdev)
4393 {
4394 }
4395
4396 /**
4397  * si_vm_decode_fault - print human readable fault info
4398  *
4399  * @rdev: radeon_device pointer
4400  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4401  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4402  *
4403  * Print human readable fault information (SI).
4404  */
4405 static void si_vm_decode_fault(struct radeon_device *rdev,
4406                                u32 status, u32 addr)
4407 {
4408         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4409         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4410         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4411         char *block;
4412
4413         if (rdev->family == CHIP_TAHITI) {
4414                 switch (mc_id) {
4415                 case 160:
4416                 case 144:
4417                 case 96:
4418                 case 80:
4419                 case 224:
4420                 case 208:
4421                 case 32:
4422                 case 16:
4423                         block = "CB";
4424                         break;
4425                 case 161:
4426                 case 145:
4427                 case 97:
4428                 case 81:
4429                 case 225:
4430                 case 209:
4431                 case 33:
4432                 case 17:
4433                         block = "CB_FMASK";
4434                         break;
4435                 case 162:
4436                 case 146:
4437                 case 98:
4438                 case 82:
4439                 case 226:
4440                 case 210:
4441                 case 34:
4442                 case 18:
4443                         block = "CB_CMASK";
4444                         break;
4445                 case 163:
4446                 case 147:
4447                 case 99:
4448                 case 83:
4449                 case 227:
4450                 case 211:
4451                 case 35:
4452                 case 19:
4453                         block = "CB_IMMED";
4454                         break;
4455                 case 164:
4456                 case 148:
4457                 case 100:
4458                 case 84:
4459                 case 228:
4460                 case 212:
4461                 case 36:
4462                 case 20:
4463                         block = "DB";
4464                         break;
4465                 case 165:
4466                 case 149:
4467                 case 101:
4468                 case 85:
4469                 case 229:
4470                 case 213:
4471                 case 37:
4472                 case 21:
4473                         block = "DB_HTILE";
4474                         break;
4475                 case 167:
4476                 case 151:
4477                 case 103:
4478                 case 87:
4479                 case 231:
4480                 case 215:
4481                 case 39:
4482                 case 23:
4483                         block = "DB_STEN";
4484                         break;
4485                 case 72:
4486                 case 68:
4487                 case 64:
4488                 case 8:
4489                 case 4:
4490                 case 0:
4491                 case 136:
4492                 case 132:
4493                 case 128:
4494                 case 200:
4495                 case 196:
4496                 case 192:
4497                         block = "TC";
4498                         break;
4499                 case 112:
4500                 case 48:
4501                         block = "CP";
4502                         break;
4503                 case 49:
4504                 case 177:
4505                 case 50:
4506                 case 178:
4507                         block = "SH";
4508                         break;
4509                 case 53:
4510                 case 190:
4511                         block = "VGT";
4512                         break;
4513                 case 117:
4514                         block = "IH";
4515                         break;
4516                 case 51:
4517                 case 115:
4518                         block = "RLC";
4519                         break;
4520                 case 119:
4521                 case 183:
4522                         block = "DMA0";
4523                         break;
4524                 case 61:
4525                         block = "DMA1";
4526                         break;
4527                 case 248:
4528                 case 120:
4529                         block = "HDP";
4530                         break;
4531                 default:
4532                         block = "unknown";
4533                         break;
4534                 }
4535         } else {
4536                 switch (mc_id) {
4537                 case 32:
4538                 case 16:
4539                 case 96:
4540                 case 80:
4541                 case 160:
4542                 case 144:
4543                 case 224:
4544                 case 208:
4545                         block = "CB";
4546                         break;
4547                 case 33:
4548                 case 17:
4549                 case 97:
4550                 case 81:
4551                 case 161:
4552                 case 145:
4553                 case 225:
4554                 case 209:
4555                         block = "CB_FMASK";
4556                         break;
4557                 case 34:
4558                 case 18:
4559                 case 98:
4560                 case 82:
4561                 case 162:
4562                 case 146:
4563                 case 226:
4564                 case 210:
4565                         block = "CB_CMASK";
4566                         break;
4567                 case 35:
4568                 case 19:
4569                 case 99:
4570                 case 83:
4571                 case 163:
4572                 case 147:
4573                 case 227:
4574                 case 211:
4575                         block = "CB_IMMED";
4576                         break;
4577                 case 36:
4578                 case 20:
4579                 case 100:
4580                 case 84:
4581                 case 164:
4582                 case 148:
4583                 case 228:
4584                 case 212:
4585                         block = "DB";
4586                         break;
4587                 case 37:
4588                 case 21:
4589                 case 101:
4590                 case 85:
4591                 case 165:
4592                 case 149:
4593                 case 229:
4594                 case 213:
4595                         block = "DB_HTILE";
4596                         break;
4597                 case 39:
4598                 case 23:
4599                 case 103:
4600                 case 87:
4601                 case 167:
4602                 case 151:
4603                 case 231:
4604                 case 215:
4605                         block = "DB_STEN";
4606                         break;
4607                 case 72:
4608                 case 68:
4609                 case 8:
4610                 case 4:
4611                 case 136:
4612                 case 132:
4613                 case 200:
4614                 case 196:
4615                         block = "TC";
4616                         break;
4617                 case 112:
4618                 case 48:
4619                         block = "CP";
4620                         break;
4621                 case 49:
4622                 case 177:
4623                 case 50:
4624                 case 178:
4625                         block = "SH";
4626                         break;
4627                 case 53:
4628                         block = "VGT";
4629                         break;
4630                 case 117:
4631                         block = "IH";
4632                         break;
4633                 case 51:
4634                 case 115:
4635                         block = "RLC";
4636                         break;
4637                 case 119:
4638                 case 183:
4639                         block = "DMA0";
4640                         break;
4641                 case 61:
4642                         block = "DMA1";
4643                         break;
4644                 case 248:
4645                 case 120:
4646                         block = "HDP";
4647                         break;
4648                 default:
4649                         block = "unknown";
4650                         break;
4651                 }
4652         }
4653
4654         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4655                protections, vmid, addr,
4656                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4657                block, mc_id);
4658 }
4659
4660 /**
4661  * si_vm_set_page - update the page tables using the CP
4662  *
4663  * @rdev: radeon_device pointer
4664  * @ib: indirect buffer to fill with commands
4665  * @pe: addr of the page entry
4666  * @addr: dst addr to write into pe
4667  * @count: number of page entries to update
4668  * @incr: increase next addr by incr bytes
4669  * @flags: access flags
4670  *
4671  * Update the page tables using the CP (SI).
4672  */
4673 void si_vm_set_page(struct radeon_device *rdev,
4674                     struct radeon_ib *ib,
4675                     uint64_t pe,
4676                     uint64_t addr, unsigned count,
4677                     uint32_t incr, uint32_t flags)
4678 {
4679         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4680         uint64_t value;
4681         unsigned ndw;
4682
4683         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4684                 while (count) {
4685                         ndw = 2 + count * 2;
4686                         if (ndw > 0x3FFE)
4687                                 ndw = 0x3FFE;
4688
4689                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4690                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4691                                         WRITE_DATA_DST_SEL(1));
4692                         ib->ptr[ib->length_dw++] = pe;
4693                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4694                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4695                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4696                                         value = radeon_vm_map_gart(rdev, addr);
4697                                         value &= 0xFFFFFFFFFFFFF000ULL;
4698                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4699                                         value = addr;
4700                                 } else {
4701                                         value = 0;
4702                                 }
4703                                 addr += incr;
4704                                 value |= r600_flags;
4705                                 ib->ptr[ib->length_dw++] = value;
4706                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4707                         }
4708                 }
4709         } else {
4710                 /* DMA */
4711                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4712                         while (count) {
4713                                 ndw = count * 2;
4714                                 if (ndw > 0xFFFFE)
4715                                         ndw = 0xFFFFE;
4716
4717                                 /* for non-physically contiguous pages (system) */
4718                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4719                                 ib->ptr[ib->length_dw++] = pe;
4720                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4721                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4722                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4723                                                 value = radeon_vm_map_gart(rdev, addr);
4724                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4725                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4726                                                 value = addr;
4727                                         } else {
4728                                                 value = 0;
4729                                         }
4730                                         addr += incr;
4731                                         value |= r600_flags;
4732                                         ib->ptr[ib->length_dw++] = value;
4733                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4734                                 }
4735                         }
4736                 } else {
4737                         while (count) {
4738                                 ndw = count * 2;
4739                                 if (ndw > 0xFFFFE)
4740                                         ndw = 0xFFFFE;
4741
4742                                 if (flags & RADEON_VM_PAGE_VALID)
4743                                         value = addr;
4744                                 else
4745                                         value = 0;
4746                                 /* for physically contiguous pages (vram) */
4747                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4748                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4749                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4750                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4751                                 ib->ptr[ib->length_dw++] = 0;
4752                                 ib->ptr[ib->length_dw++] = value; /* value */
4753                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4754                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4755                                 ib->ptr[ib->length_dw++] = 0;
4756                                 pe += ndw * 4;
4757                                 addr += (ndw / 2) * incr;
4758                                 count -= ndw / 2;
4759                         }
4760                 }
4761                 while (ib->length_dw & 0x7)
4762                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4763         }
4764 }
4765
4766 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4767 {
4768         struct radeon_ring *ring = &rdev->ring[ridx];
4769
4770         if (vm == NULL)
4771                 return;
4772
4773         /* write new base address */
4774         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4775         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4776                                  WRITE_DATA_DST_SEL(0)));
4777
4778         if (vm->id < 8) {
4779                 radeon_ring_write(ring,
4780                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4781         } else {
4782                 radeon_ring_write(ring,
4783                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4784         }
4785         radeon_ring_write(ring, 0);
4786         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4787
4788         /* flush hdp cache */
4789         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4790         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4791                                  WRITE_DATA_DST_SEL(0)));
4792         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4793         radeon_ring_write(ring, 0);
4794         radeon_ring_write(ring, 0x1);
4795
4796         /* bits 0-15 are the VM contexts0-15 */
4797         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4798         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4799                                  WRITE_DATA_DST_SEL(0)));
4800         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4801         radeon_ring_write(ring, 0);
4802         radeon_ring_write(ring, 1 << vm->id);
4803
4804         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4805         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4806         radeon_ring_write(ring, 0x0);
4807 }
4808
4809 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4810 {
4811         struct radeon_ring *ring = &rdev->ring[ridx];
4812
4813         if (vm == NULL)
4814                 return;
4815
4816         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4817         if (vm->id < 8) {
4818                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4819         } else {
4820                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4821         }
4822         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4823
4824         /* flush hdp cache */
4825         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4826         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4827         radeon_ring_write(ring, 1);
4828
4829         /* bits 0-7 are the VM contexts0-7 */
4830         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4831         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4832         radeon_ring_write(ring, 1 << vm->id);
4833 }
4834
4835 /*
4836  *  Power and clock gating
4837  */
4838 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4839 {
4840         int i;
4841
4842         for (i = 0; i < rdev->usec_timeout; i++) {
4843                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4844                         break;
4845                 udelay(1);
4846         }
4847
4848         for (i = 0; i < rdev->usec_timeout; i++) {
4849                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4850                         break;
4851                 udelay(1);
4852         }
4853 }
4854
4855 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4856                                          bool enable)
4857 {
4858         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4859         u32 mask;
4860         int i;
4861
4862         if (enable)
4863                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4864         else
4865                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4866         WREG32(CP_INT_CNTL_RING0, tmp);
4867
4868         if (!enable) {
4869                 /* read a gfx register */
4870                 tmp = RREG32(DB_DEPTH_INFO);
4871
4872                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4873                 for (i = 0; i < rdev->usec_timeout; i++) {
4874                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4875                                 break;
4876                         udelay(1);
4877                 }
4878         }
4879 }
4880
4881 static void si_set_uvd_dcm(struct radeon_device *rdev,
4882                            bool sw_mode)
4883 {
4884         u32 tmp, tmp2;
4885
4886         tmp = RREG32(UVD_CGC_CTRL);
4887         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4888         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4889
4890         if (sw_mode) {
4891                 tmp &= ~0x7ffff800;
4892                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4893         } else {
4894                 tmp |= 0x7ffff800;
4895                 tmp2 = 0;
4896         }
4897
4898         WREG32(UVD_CGC_CTRL, tmp);
4899         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4900 }
4901
4902 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4903 {
4904         bool hw_mode = true;
4905
4906         if (hw_mode) {
4907                 si_set_uvd_dcm(rdev, false);
4908         } else {
4909                 u32 tmp = RREG32(UVD_CGC_CTRL);
4910                 tmp &= ~DCM;
4911                 WREG32(UVD_CGC_CTRL, tmp);
4912         }
4913 }
4914
4915 static u32 si_halt_rlc(struct radeon_device *rdev)
4916 {
4917         u32 data, orig;
4918
4919         orig = data = RREG32(RLC_CNTL);
4920
4921         if (data & RLC_ENABLE) {
4922                 data &= ~RLC_ENABLE;
4923                 WREG32(RLC_CNTL, data);
4924
4925                 si_wait_for_rlc_serdes(rdev);
4926         }
4927
4928         return orig;
4929 }
4930
4931 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4932 {
4933         u32 tmp;
4934
4935         tmp = RREG32(RLC_CNTL);
4936         if (tmp != rlc)
4937                 WREG32(RLC_CNTL, rlc);
4938 }
4939
4940 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4941 {
4942         u32 data, orig;
4943
4944         orig = data = RREG32(DMA_PG);
4945         if (enable)
4946                 data |= PG_CNTL_ENABLE;
4947         else
4948                 data &= ~PG_CNTL_ENABLE;
4949         if (orig != data)
4950                 WREG32(DMA_PG, data);
4951 }
4952
4953 static void si_init_dma_pg(struct radeon_device *rdev)
4954 {
4955         u32 tmp;
4956
4957         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4958         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4959
4960         for (tmp = 0; tmp < 5; tmp++)
4961                 WREG32(DMA_PGFSM_WRITE, 0);
4962 }
4963
4964 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4965                                bool enable)
4966 {
4967         u32 tmp;
4968
4969         if (enable) {
4970                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4971                 WREG32(RLC_TTOP_D, tmp);
4972
4973                 tmp = RREG32(RLC_PG_CNTL);
4974                 tmp |= GFX_PG_ENABLE;
4975                 WREG32(RLC_PG_CNTL, tmp);
4976
4977                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4978                 tmp |= AUTO_PG_EN;
4979                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4980         } else {
4981                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4982                 tmp &= ~AUTO_PG_EN;
4983                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4984
4985                 tmp = RREG32(DB_RENDER_CONTROL);
4986         }
4987 }
4988
4989 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4990 {
4991         u32 tmp;
4992
4993         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4994
4995         tmp = RREG32(RLC_PG_CNTL);
4996         tmp |= GFX_PG_SRC;
4997         WREG32(RLC_PG_CNTL, tmp);
4998
4999         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5000
5001         tmp = RREG32(RLC_AUTO_PG_CTRL);
5002
5003         tmp &= ~GRBM_REG_SGIT_MASK;
5004         tmp |= GRBM_REG_SGIT(0x700);
5005         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5006         WREG32(RLC_AUTO_PG_CTRL, tmp);
5007 }
5008
5009 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5010 {
5011         u32 mask = 0, tmp, tmp1;
5012         int i;
5013
5014         si_select_se_sh(rdev, se, sh);
5015         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5016         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5017         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5018
5019         tmp &= 0xffff0000;
5020
5021         tmp |= tmp1;
5022         tmp >>= 16;
5023
5024         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5025                 mask <<= 1;
5026                 mask |= 1;
5027         }
5028
5029         return (~tmp) & mask;
5030 }
5031
5032 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5033 {
5034         u32 i, j, k, active_cu_number = 0;
5035         u32 mask, counter, cu_bitmap;
5036         u32 tmp = 0;
5037
5038         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5039                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5040                         mask = 1;
5041                         cu_bitmap = 0;
5042                         counter  = 0;
5043                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5044                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5045                                         if (counter < 2)
5046                                                 cu_bitmap |= mask;
5047                                         counter++;
5048                                 }
5049                                 mask <<= 1;
5050                         }
5051
5052                         active_cu_number += counter;
5053                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5054                 }
5055         }
5056
5057         WREG32(RLC_PG_AO_CU_MASK, tmp);
5058
5059         tmp = RREG32(RLC_MAX_PG_CU);
5060         tmp &= ~MAX_PU_CU_MASK;
5061         tmp |= MAX_PU_CU(active_cu_number);
5062         WREG32(RLC_MAX_PG_CU, tmp);
5063 }
5064
5065 static void si_enable_cgcg(struct radeon_device *rdev,
5066                            bool enable)
5067 {
5068         u32 data, orig, tmp;
5069
5070         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5071
5072         si_enable_gui_idle_interrupt(rdev, enable);
5073
5074         if (enable) {
5075                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5076
5077                 tmp = si_halt_rlc(rdev);
5078
5079                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5080                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5081                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5082
5083                 si_wait_for_rlc_serdes(rdev);
5084
5085                 si_update_rlc(rdev, tmp);
5086
5087                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5088
5089                 data |= CGCG_EN | CGLS_EN;
5090         } else {
5091                 RREG32(CB_CGTT_SCLK_CTRL);
5092                 RREG32(CB_CGTT_SCLK_CTRL);
5093                 RREG32(CB_CGTT_SCLK_CTRL);
5094                 RREG32(CB_CGTT_SCLK_CTRL);
5095
5096                 data &= ~(CGCG_EN | CGLS_EN);
5097         }
5098
5099         if (orig != data)
5100                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5101 }
5102
5103 static void si_enable_mgcg(struct radeon_device *rdev,
5104                            bool enable)
5105 {
5106         u32 data, orig, tmp = 0;
5107
5108         if (enable) {
5109                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5110                 data = 0x96940200;
5111                 if (orig != data)
5112                         WREG32(CGTS_SM_CTRL_REG, data);
5113
5114                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5115                 data |= CP_MEM_LS_EN;
5116                 if (orig != data)
5117                         WREG32(CP_MEM_SLP_CNTL, data);
5118
5119                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5120                 data &= 0xffffffc0;
5121                 if (orig != data)
5122                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5123
5124                 tmp = si_halt_rlc(rdev);
5125
5126                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5127                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5128                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5129
5130                 si_update_rlc(rdev, tmp);
5131         } else {
5132                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5133                 data |= 0x00000003;
5134                 if (orig != data)
5135                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5136
5137                 data = RREG32(CP_MEM_SLP_CNTL);
5138                 if (data & CP_MEM_LS_EN) {
5139                         data &= ~CP_MEM_LS_EN;
5140                         WREG32(CP_MEM_SLP_CNTL, data);
5141                 }
5142                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5143                 data |= LS_OVERRIDE | OVERRIDE;
5144                 if (orig != data)
5145                         WREG32(CGTS_SM_CTRL_REG, data);
5146
5147                 tmp = si_halt_rlc(rdev);
5148
5149                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5150                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5151                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5152
5153                 si_update_rlc(rdev, tmp);
5154         }
5155 }
5156
5157 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5158                                bool enable)
5159 {
5160         u32 orig, data, tmp;
5161
5162         if (enable) {
5163                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5164                 tmp |= 0x3fff;
5165                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5166
5167                 orig = data = RREG32(UVD_CGC_CTRL);
5168                 data |= DCM;
5169                 if (orig != data)
5170                         WREG32(UVD_CGC_CTRL, data);
5171
5172                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5173                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5174         } else {
5175                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5176                 tmp &= ~0x3fff;
5177                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5178
5179                 orig = data = RREG32(UVD_CGC_CTRL);
5180                 data &= ~DCM;
5181                 if (orig != data)
5182                         WREG32(UVD_CGC_CTRL, data);
5183
5184                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5185                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5186         }
5187 }
5188
5189 static const u32 mc_cg_registers[] =
5190 {
5191         MC_HUB_MISC_HUB_CG,
5192         MC_HUB_MISC_SIP_CG,
5193         MC_HUB_MISC_VM_CG,
5194         MC_XPB_CLK_GAT,
5195         ATC_MISC_CG,
5196         MC_CITF_MISC_WR_CG,
5197         MC_CITF_MISC_RD_CG,
5198         MC_CITF_MISC_VM_CG,
5199         VM_L2_CG,
5200 };
5201
5202 static void si_enable_mc_ls(struct radeon_device *rdev,
5203                             bool enable)
5204 {
5205         int i;
5206         u32 orig, data;
5207
5208         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5209                 orig = data = RREG32(mc_cg_registers[i]);
5210                 if (enable)
5211                         data |= MC_LS_ENABLE;
5212                 else
5213                         data &= ~MC_LS_ENABLE;
5214                 if (data != orig)
5215                         WREG32(mc_cg_registers[i], data);
5216         }
5217 }
5218
5219
5220 static void si_init_cg(struct radeon_device *rdev)
5221 {
5222         si_enable_mgcg(rdev, true);
5223         si_enable_cgcg(rdev, false);
5224         /* disable MC LS on Tahiti */
5225         if (rdev->family == CHIP_TAHITI)
5226                 si_enable_mc_ls(rdev, false);
5227         if (rdev->has_uvd) {
5228                 si_enable_uvd_mgcg(rdev, true);
5229                 si_init_uvd_internal_cg(rdev);
5230         }
5231 }
5232
5233 static void si_fini_cg(struct radeon_device *rdev)
5234 {
5235         if (rdev->has_uvd)
5236                 si_enable_uvd_mgcg(rdev, false);
5237         si_enable_cgcg(rdev, false);
5238         si_enable_mgcg(rdev, false);
5239 }
5240
5241 static void si_init_pg(struct radeon_device *rdev)
5242 {
5243         bool has_pg = false;
5244 #if 0
5245         /* only cape verde supports PG */
5246         if (rdev->family == CHIP_VERDE)
5247                 has_pg = true;
5248 #endif
5249         if (has_pg) {
5250                 si_init_ao_cu_mask(rdev);
5251                 si_init_dma_pg(rdev);
5252                 si_enable_dma_pg(rdev, true);
5253                 si_init_gfx_cgpg(rdev);
5254                 si_enable_gfx_cgpg(rdev, true);
5255         } else {
5256                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5257                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5258         }
5259 }
5260
5261 static void si_fini_pg(struct radeon_device *rdev)
5262 {
5263         bool has_pg = false;
5264
5265         /* only cape verde supports PG */
5266         if (rdev->family == CHIP_VERDE)
5267                 has_pg = true;
5268
5269         if (has_pg) {
5270                 si_enable_dma_pg(rdev, false);
5271                 si_enable_gfx_cgpg(rdev, false);
5272         }
5273 }
5274
5275 /*
5276  * RLC
5277  */
5278 void si_rlc_fini(struct radeon_device *rdev)
5279 {
5280         int r;
5281
5282         /* save restore block */
5283         if (rdev->rlc.save_restore_obj) {
5284                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5285                 if (unlikely(r != 0))
5286                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5287                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5288                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5289
5290                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5291                 rdev->rlc.save_restore_obj = NULL;
5292         }
5293
5294         /* clear state block */
5295         if (rdev->rlc.clear_state_obj) {
5296                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5297                 if (unlikely(r != 0))
5298                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5299                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5300                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5301
5302                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5303                 rdev->rlc.clear_state_obj = NULL;
5304         }
5305 }
5306
5307 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5308
5309 int si_rlc_init(struct radeon_device *rdev)
5310 {
5311         volatile u32 *dst_ptr;
5312         u32 dws, data, i, j, k, reg_num;
5313         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5314         u64 reg_list_mc_addr;
5315         const struct cs_section_def *cs_data = si_cs_data;
5316         int r;
5317
5318         /* save restore block */
5319         if (rdev->rlc.save_restore_obj == NULL) {
5320                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5321                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5322                                      &rdev->rlc.save_restore_obj);
5323                 if (r) {
5324                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5325                         return r;
5326                 }
5327         }
5328
5329         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5330         if (unlikely(r != 0)) {
5331                 si_rlc_fini(rdev);
5332                 return r;
5333         }
5334         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5335                           &rdev->rlc.save_restore_gpu_addr);
5336         if (r) {
5337                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5338                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5339                 si_rlc_fini(rdev);
5340                 return r;
5341         }
5342
5343         if (rdev->family == CHIP_VERDE) {
5344                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
5345                 if (r) {
5346                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5347                         si_rlc_fini(rdev);
5348                 return r;
5349                 }
5350                 /* write the sr buffer */
5351                 dst_ptr = rdev->rlc.sr_ptr;
5352                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5353                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5354                 }
5355                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5356         }
5357         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5358
5359         /* clear state block */
5360         reg_list_num = 0;
5361         dws = 0;
5362         for (i = 0; cs_data[i].section != NULL; i++) {
5363                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5364                         reg_list_num++;
5365                         dws += cs_data[i].section[j].reg_count;
5366                 }
5367         }
5368         reg_list_blk_index = (3 * reg_list_num + 2);
5369         dws += reg_list_blk_index;
5370
5371         if (rdev->rlc.clear_state_obj == NULL) {
5372                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5373                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5374                 if (r) {
5375                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5376                         si_rlc_fini(rdev);
5377                         return r;
5378                 }
5379         }
5380         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5381         if (unlikely(r != 0)) {
5382                 si_rlc_fini(rdev);
5383                 return r;
5384         }
5385         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5386                           &rdev->rlc.clear_state_gpu_addr);
5387         if (r) {
5388
5389                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5390                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5391                 si_rlc_fini(rdev);
5392                 return r;
5393         }
5394         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
5395         if (r) {
5396                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5397                 si_rlc_fini(rdev);
5398                 return r;
5399         }
5400         /* set up the cs buffer */
5401         dst_ptr = rdev->rlc.cs_ptr;
5402         reg_list_hdr_blk_index = 0;
5403         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5404         data = upper_32_bits(reg_list_mc_addr);
5405         dst_ptr[reg_list_hdr_blk_index] = data;
5406         reg_list_hdr_blk_index++;
5407         for (i = 0; cs_data[i].section != NULL; i++) {
5408                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5409                         reg_num = cs_data[i].section[j].reg_count;
5410                         data = reg_list_mc_addr & 0xffffffff;
5411                         dst_ptr[reg_list_hdr_blk_index] = data;
5412                         reg_list_hdr_blk_index++;
5413
5414                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5415                         dst_ptr[reg_list_hdr_blk_index] = data;
5416                         reg_list_hdr_blk_index++;
5417
5418                         data = 0x08000000 | (reg_num * 4);
5419                         dst_ptr[reg_list_hdr_blk_index] = data;
5420                         reg_list_hdr_blk_index++;
5421
5422                         for (k = 0; k < reg_num; k++) {
5423                                 data = cs_data[i].section[j].extent[k];
5424                                 dst_ptr[reg_list_blk_index + k] = data;
5425                         }
5426                         reg_list_mc_addr += reg_num * 4;
5427                         reg_list_blk_index += reg_num;
5428                 }
5429         }
5430         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5431
5432         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5433         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5434
5435         return 0;
5436 }
5437
5438 static void si_rlc_reset(struct radeon_device *rdev)
5439 {
5440         u32 tmp = RREG32(GRBM_SOFT_RESET);
5441
5442         tmp |= SOFT_RESET_RLC;
5443         WREG32(GRBM_SOFT_RESET, tmp);
5444         udelay(50);
5445         tmp &= ~SOFT_RESET_RLC;
5446         WREG32(GRBM_SOFT_RESET, tmp);
5447         udelay(50);
5448 }
5449
5450 static void si_rlc_stop(struct radeon_device *rdev)
5451 {
5452         WREG32(RLC_CNTL, 0);
5453
5454         si_enable_gui_idle_interrupt(rdev, false);
5455
5456         si_wait_for_rlc_serdes(rdev);
5457 }
5458
5459 static void si_rlc_start(struct radeon_device *rdev)
5460 {
5461         WREG32(RLC_CNTL, RLC_ENABLE);
5462
5463         si_enable_gui_idle_interrupt(rdev, true);
5464
5465         udelay(50);
5466 }
5467
5468 static bool si_lbpw_supported(struct radeon_device *rdev)
5469 {
5470         u32 tmp;
5471
5472         /* Enable LBPW only for DDR3 */
5473         tmp = RREG32(MC_SEQ_MISC0);
5474         if ((tmp & 0xF0000000) == 0xB0000000)
5475                 return true;
5476         return false;
5477 }
5478
5479 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5480 {
5481         u32 tmp;
5482
5483         tmp = RREG32(RLC_LB_CNTL);
5484         if (enable)
5485                 tmp |= LOAD_BALANCE_ENABLE;
5486         else
5487                 tmp &= ~LOAD_BALANCE_ENABLE;
5488         WREG32(RLC_LB_CNTL, tmp);
5489
5490         if (!enable) {
5491                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5492                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5493         }
5494 }
5495
5496 static int si_rlc_resume(struct radeon_device *rdev)
5497 {
5498         u32 i;
5499         const __be32 *fw_data;
5500
5501         if (!rdev->rlc_fw)
5502                 return -EINVAL;
5503
5504         si_rlc_stop(rdev);
5505
5506         si_rlc_reset(rdev);
5507
5508         si_init_pg(rdev);
5509
5510         si_init_cg(rdev);
5511
5512         WREG32(RLC_RL_BASE, 0);
5513         WREG32(RLC_RL_SIZE, 0);
5514         WREG32(RLC_LB_CNTL, 0);
5515         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5516         WREG32(RLC_LB_CNTR_INIT, 0);
5517         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5518
5519         WREG32(RLC_MC_CNTL, 0);
5520         WREG32(RLC_UCODE_CNTL, 0);
5521
5522         fw_data = (const __be32 *)rdev->rlc_fw->data;
5523         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5524                 WREG32(RLC_UCODE_ADDR, i);
5525                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5526         }
5527         WREG32(RLC_UCODE_ADDR, 0);
5528
5529         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5530
5531         si_rlc_start(rdev);
5532
5533         return 0;
5534 }
5535
5536 static void si_enable_interrupts(struct radeon_device *rdev)
5537 {
5538         u32 ih_cntl = RREG32(IH_CNTL);
5539         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5540
5541         ih_cntl |= ENABLE_INTR;
5542         ih_rb_cntl |= IH_RB_ENABLE;
5543         WREG32(IH_CNTL, ih_cntl);
5544         WREG32(IH_RB_CNTL, ih_rb_cntl);
5545         rdev->ih.enabled = true;
5546 }
5547
5548 static void si_disable_interrupts(struct radeon_device *rdev)
5549 {
5550         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5551         u32 ih_cntl = RREG32(IH_CNTL);
5552
5553         ih_rb_cntl &= ~IH_RB_ENABLE;
5554         ih_cntl &= ~ENABLE_INTR;
5555         WREG32(IH_RB_CNTL, ih_rb_cntl);
5556         WREG32(IH_CNTL, ih_cntl);
5557         /* set rptr, wptr to 0 */
5558         WREG32(IH_RB_RPTR, 0);
5559         WREG32(IH_RB_WPTR, 0);
5560         rdev->ih.enabled = false;
5561         rdev->ih.rptr = 0;
5562 }
5563
5564 static void si_disable_interrupt_state(struct radeon_device *rdev)
5565 {
5566         u32 tmp;
5567
5568         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5569         WREG32(CP_INT_CNTL_RING1, 0);
5570         WREG32(CP_INT_CNTL_RING2, 0);
5571         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5572         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5573         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5574         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5575         WREG32(GRBM_INT_CNTL, 0);
5576         if (rdev->num_crtc >= 2) {
5577                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5578                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5579         }
5580         if (rdev->num_crtc >= 4) {
5581                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5582                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5583         }
5584         if (rdev->num_crtc >= 6) {
5585                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5586                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5587         }
5588
5589         if (rdev->num_crtc >= 2) {
5590                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5591                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5592         }
5593         if (rdev->num_crtc >= 4) {
5594                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5595                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5596         }
5597         if (rdev->num_crtc >= 6) {
5598                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5599                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5600         }
5601
5602         if (!ASIC_IS_NODCE(rdev)) {
5603                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5604
5605                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5606                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5607                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5608                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5609                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5610                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5611                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5612                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5613                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5614                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5615                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5616                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5617         }
5618 }
5619
5620 static int si_irq_init(struct radeon_device *rdev)
5621 {
5622         int ret = 0;
5623         int rb_bufsz;
5624         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5625
5626         /* allocate ring */
5627         ret = r600_ih_ring_alloc(rdev);
5628         if (ret)
5629                 return ret;
5630
5631         /* disable irqs */
5632         si_disable_interrupts(rdev);
5633
5634         /* init rlc */
5635         ret = si_rlc_resume(rdev);
5636         if (ret) {
5637                 r600_ih_ring_fini(rdev);
5638                 return ret;
5639         }
5640
5641         /* setup interrupt control */
5642         /* set dummy read address to ring address */
5643         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5644         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5645         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5646          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5647          */
5648         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5649         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5650         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5651         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5652
5653         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5654         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5655
5656         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5657                       IH_WPTR_OVERFLOW_CLEAR |
5658                       (rb_bufsz << 1));
5659
5660         if (rdev->wb.enabled)
5661                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5662
5663         /* set the writeback address whether it's enabled or not */
5664         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5665         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5666
5667         WREG32(IH_RB_CNTL, ih_rb_cntl);
5668
5669         /* set rptr, wptr to 0 */
5670         WREG32(IH_RB_RPTR, 0);
5671         WREG32(IH_RB_WPTR, 0);
5672
5673         /* Default settings for IH_CNTL (disabled at first) */
5674         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5675         /* RPTR_REARM only works if msi's are enabled */
5676         if (rdev->msi_enabled)
5677                 ih_cntl |= RPTR_REARM;
5678         WREG32(IH_CNTL, ih_cntl);
5679
5680         /* force the active interrupt state to all disabled */
5681         si_disable_interrupt_state(rdev);
5682
5683         pci_set_master(rdev->pdev);
5684
5685         /* enable irqs */
5686         si_enable_interrupts(rdev);
5687
5688         return ret;
5689 }
5690
5691 int si_irq_set(struct radeon_device *rdev)
5692 {
5693         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5694         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5695         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5696         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5697         u32 grbm_int_cntl = 0;
5698         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5699         u32 dma_cntl, dma_cntl1;
5700         u32 thermal_int = 0;
5701
5702         if (!rdev->irq.installed) {
5703                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5704                 return -EINVAL;
5705         }
5706         /* don't enable anything if the ih is disabled */
5707         if (!rdev->ih.enabled) {
5708                 si_disable_interrupts(rdev);
5709                 /* force the active interrupt state to all disabled */
5710                 si_disable_interrupt_state(rdev);
5711                 return 0;
5712         }
5713
5714         if (!ASIC_IS_NODCE(rdev)) {
5715                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5717                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5718                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5719                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5720                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5721         }
5722
5723         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5724         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5725
5726         thermal_int = RREG32(CG_THERMAL_INT) &
5727                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5728
5729         /* enable CP interrupts on all rings */
5730         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5731                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5732                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5733         }
5734         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5735                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5736                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5737         }
5738         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5739                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5740                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5741         }
5742         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5743                 DRM_DEBUG("si_irq_set: sw int dma\n");
5744                 dma_cntl |= TRAP_ENABLE;
5745         }
5746
5747         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5748                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5749                 dma_cntl1 |= TRAP_ENABLE;
5750         }
5751         if (rdev->irq.crtc_vblank_int[0] ||
5752             atomic_read(&rdev->irq.pflip[0])) {
5753                 DRM_DEBUG("si_irq_set: vblank 0\n");
5754                 crtc1 |= VBLANK_INT_MASK;
5755         }
5756         if (rdev->irq.crtc_vblank_int[1] ||
5757             atomic_read(&rdev->irq.pflip[1])) {
5758                 DRM_DEBUG("si_irq_set: vblank 1\n");
5759                 crtc2 |= VBLANK_INT_MASK;
5760         }
5761         if (rdev->irq.crtc_vblank_int[2] ||
5762             atomic_read(&rdev->irq.pflip[2])) {
5763                 DRM_DEBUG("si_irq_set: vblank 2\n");
5764                 crtc3 |= VBLANK_INT_MASK;
5765         }
5766         if (rdev->irq.crtc_vblank_int[3] ||
5767             atomic_read(&rdev->irq.pflip[3])) {
5768                 DRM_DEBUG("si_irq_set: vblank 3\n");
5769                 crtc4 |= VBLANK_INT_MASK;
5770         }
5771         if (rdev->irq.crtc_vblank_int[4] ||
5772             atomic_read(&rdev->irq.pflip[4])) {
5773                 DRM_DEBUG("si_irq_set: vblank 4\n");
5774                 crtc5 |= VBLANK_INT_MASK;
5775         }
5776         if (rdev->irq.crtc_vblank_int[5] ||
5777             atomic_read(&rdev->irq.pflip[5])) {
5778                 DRM_DEBUG("si_irq_set: vblank 5\n");
5779                 crtc6 |= VBLANK_INT_MASK;
5780         }
5781         if (rdev->irq.hpd[0]) {
5782                 DRM_DEBUG("si_irq_set: hpd 1\n");
5783                 hpd1 |= DC_HPDx_INT_EN;
5784         }
5785         if (rdev->irq.hpd[1]) {
5786                 DRM_DEBUG("si_irq_set: hpd 2\n");
5787                 hpd2 |= DC_HPDx_INT_EN;
5788         }
5789         if (rdev->irq.hpd[2]) {
5790                 DRM_DEBUG("si_irq_set: hpd 3\n");
5791                 hpd3 |= DC_HPDx_INT_EN;
5792         }
5793         if (rdev->irq.hpd[3]) {
5794                 DRM_DEBUG("si_irq_set: hpd 4\n");
5795                 hpd4 |= DC_HPDx_INT_EN;
5796         }
5797         if (rdev->irq.hpd[4]) {
5798                 DRM_DEBUG("si_irq_set: hpd 5\n");
5799                 hpd5 |= DC_HPDx_INT_EN;
5800         }
5801         if (rdev->irq.hpd[5]) {
5802                 DRM_DEBUG("si_irq_set: hpd 6\n");
5803                 hpd6 |= DC_HPDx_INT_EN;
5804         }
5805
5806         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5807         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5808         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5809
5810         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5811         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5812
5813         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5814
5815         if (rdev->irq.dpm_thermal) {
5816                 DRM_DEBUG("dpm thermal\n");
5817                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5818         }
5819
5820         if (rdev->num_crtc >= 2) {
5821                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5822                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5823         }
5824         if (rdev->num_crtc >= 4) {
5825                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5826                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5827         }
5828         if (rdev->num_crtc >= 6) {
5829                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5830                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5831         }
5832
5833         if (rdev->num_crtc >= 2) {
5834                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5835                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5836         }
5837         if (rdev->num_crtc >= 4) {
5838                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5839                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5840         }
5841         if (rdev->num_crtc >= 6) {
5842                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5843                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5844         }
5845
5846         if (!ASIC_IS_NODCE(rdev)) {
5847                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5848                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5849                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5850                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5851                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5852                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5853         }
5854
5855         WREG32(CG_THERMAL_INT, thermal_int);
5856
5857         return 0;
5858 }
5859
5860 static inline void si_irq_ack(struct radeon_device *rdev)
5861 {
5862         u32 tmp;
5863
5864         if (ASIC_IS_NODCE(rdev))
5865                 return;
5866
5867         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5868         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5869         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5870         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5871         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5872         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5873         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5874         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5875         if (rdev->num_crtc >= 4) {
5876                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5877                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5878         }
5879         if (rdev->num_crtc >= 6) {
5880                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5881                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5882         }
5883
5884         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5885                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5886         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5887                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5888         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5889                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5890         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5891                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5892         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5893                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5894         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5895                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5896
5897         if (rdev->num_crtc >= 4) {
5898                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5899                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5900                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5901                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5902                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5903                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5904                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5905                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5906                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5907                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5908                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5909                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5910         }
5911
5912         if (rdev->num_crtc >= 6) {
5913                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5914                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5915                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5916                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5917                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5918                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5919                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5920                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5921                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5922                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5923                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5924                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5925         }
5926
5927         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5928                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5929                 tmp |= DC_HPDx_INT_ACK;
5930                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5931         }
5932         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5933                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5934                 tmp |= DC_HPDx_INT_ACK;
5935                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5936         }
5937         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5938                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5939                 tmp |= DC_HPDx_INT_ACK;
5940                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5941         }
5942         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5943                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5944                 tmp |= DC_HPDx_INT_ACK;
5945                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5946         }
5947         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5948                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5949                 tmp |= DC_HPDx_INT_ACK;
5950                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5951         }
5952         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5953                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5954                 tmp |= DC_HPDx_INT_ACK;
5955                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5956         }
5957 }
5958
5959 static void si_irq_disable(struct radeon_device *rdev)
5960 {
5961         si_disable_interrupts(rdev);
5962         /* Wait and acknowledge irq */
5963         mdelay(1);
5964         si_irq_ack(rdev);
5965         si_disable_interrupt_state(rdev);
5966 }
5967
5968 static void si_irq_suspend(struct radeon_device *rdev)
5969 {
5970         si_irq_disable(rdev);
5971         si_rlc_stop(rdev);
5972 }
5973
5974 static void si_irq_fini(struct radeon_device *rdev)
5975 {
5976         si_irq_suspend(rdev);
5977         r600_ih_ring_fini(rdev);
5978 }
5979
5980 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5981 {
5982         u32 wptr, tmp;
5983
5984         if (rdev->wb.enabled)
5985                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5986         else
5987                 wptr = RREG32(IH_RB_WPTR);
5988
5989         if (wptr & RB_OVERFLOW) {
5990                 /* When a ring buffer overflow happen start parsing interrupt
5991                  * from the last not overwritten vector (wptr + 16). Hopefully
5992                  * this should allow us to catchup.
5993                  */
5994                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5995                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5996                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5997                 tmp = RREG32(IH_RB_CNTL);
5998                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5999                 WREG32(IH_RB_CNTL, tmp);
6000         }
6001         return (wptr & rdev->ih.ptr_mask);
6002 }
6003
6004 /*        SI IV Ring
6005  * Each IV ring entry is 128 bits:
6006  * [7:0]    - interrupt source id
6007  * [31:8]   - reserved
6008  * [59:32]  - interrupt source data
6009  * [63:60]  - reserved
6010  * [71:64]  - RINGID
6011  * [79:72]  - VMID
6012  * [127:80] - reserved
6013  */
6014 int si_irq_process(struct radeon_device *rdev)
6015 {
6016         u32 wptr;
6017         u32 rptr;
6018         u32 src_id, src_data, ring_id;
6019         u32 ring_index;
6020         bool queue_hotplug = false;
6021         bool queue_thermal = false;
6022         u32 status, addr;
6023
6024         if (!rdev->ih.enabled || rdev->shutdown)
6025                 return IRQ_NONE;
6026
6027         wptr = si_get_ih_wptr(rdev);
6028
6029 restart_ih:
6030         /* is somebody else already processing irqs? */
6031         if (atomic_xchg(&rdev->ih.lock, 1))
6032                 return IRQ_NONE;
6033
6034         rptr = rdev->ih.rptr;
6035         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6036
6037         /* Order reading of wptr vs. reading of IH ring data */
6038         rmb();
6039
6040         /* display interrupts */
6041         si_irq_ack(rdev);
6042
6043         while (rptr != wptr) {
6044                 /* wptr/rptr are in bytes! */
6045                 ring_index = rptr / 4;
6046                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6047                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6048                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6049
6050                 switch (src_id) {
6051                 case 1: /* D1 vblank/vline */
6052                         switch (src_data) {
6053                         case 0: /* D1 vblank */
6054                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6055                                         if (rdev->irq.crtc_vblank_int[0]) {
6056                                                 drm_handle_vblank(rdev->ddev, 0);
6057                                                 rdev->pm.vblank_sync = true;
6058                                                 wake_up(&rdev->irq.vblank_queue);
6059                                         }
6060                                         if (atomic_read(&rdev->irq.pflip[0]))
6061                                                 radeon_crtc_handle_flip(rdev, 0);
6062                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6063                                         DRM_DEBUG("IH: D1 vblank\n");
6064                                 }
6065                                 break;
6066                         case 1: /* D1 vline */
6067                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6068                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6069                                         DRM_DEBUG("IH: D1 vline\n");
6070                                 }
6071                                 break;
6072                         default:
6073                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6074                                 break;
6075                         }
6076                         break;
6077                 case 2: /* D2 vblank/vline */
6078                         switch (src_data) {
6079                         case 0: /* D2 vblank */
6080                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6081                                         if (rdev->irq.crtc_vblank_int[1]) {
6082                                                 drm_handle_vblank(rdev->ddev, 1);
6083                                                 rdev->pm.vblank_sync = true;
6084                                                 wake_up(&rdev->irq.vblank_queue);
6085                                         }
6086                                         if (atomic_read(&rdev->irq.pflip[1]))
6087                                                 radeon_crtc_handle_flip(rdev, 1);
6088                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6089                                         DRM_DEBUG("IH: D2 vblank\n");
6090                                 }
6091                                 break;
6092                         case 1: /* D2 vline */
6093                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6094                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6095                                         DRM_DEBUG("IH: D2 vline\n");
6096                                 }
6097                                 break;
6098                         default:
6099                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6100                                 break;
6101                         }
6102                         break;
6103                 case 3: /* D3 vblank/vline */
6104                         switch (src_data) {
6105                         case 0: /* D3 vblank */
6106                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6107                                         if (rdev->irq.crtc_vblank_int[2]) {
6108                                                 drm_handle_vblank(rdev->ddev, 2);
6109                                                 rdev->pm.vblank_sync = true;
6110                                                 wake_up(&rdev->irq.vblank_queue);
6111                                         }
6112                                         if (atomic_read(&rdev->irq.pflip[2]))
6113                                                 radeon_crtc_handle_flip(rdev, 2);
6114                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6115                                         DRM_DEBUG("IH: D3 vblank\n");
6116                                 }
6117                                 break;
6118                         case 1: /* D3 vline */
6119                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6120                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6121                                         DRM_DEBUG("IH: D3 vline\n");
6122                                 }
6123                                 break;
6124                         default:
6125                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6126                                 break;
6127                         }
6128                         break;
6129                 case 4: /* D4 vblank/vline */
6130                         switch (src_data) {
6131                         case 0: /* D4 vblank */
6132                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6133                                         if (rdev->irq.crtc_vblank_int[3]) {
6134                                                 drm_handle_vblank(rdev->ddev, 3);
6135                                                 rdev->pm.vblank_sync = true;
6136                                                 wake_up(&rdev->irq.vblank_queue);
6137                                         }
6138                                         if (atomic_read(&rdev->irq.pflip[3]))
6139                                                 radeon_crtc_handle_flip(rdev, 3);
6140                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6141                                         DRM_DEBUG("IH: D4 vblank\n");
6142                                 }
6143                                 break;
6144                         case 1: /* D4 vline */
6145                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6146                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6147                                         DRM_DEBUG("IH: D4 vline\n");
6148                                 }
6149                                 break;
6150                         default:
6151                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6152                                 break;
6153                         }
6154                         break;
6155                 case 5: /* D5 vblank/vline */
6156                         switch (src_data) {
6157                         case 0: /* D5 vblank */
6158                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6159                                         if (rdev->irq.crtc_vblank_int[4]) {
6160                                                 drm_handle_vblank(rdev->ddev, 4);
6161                                                 rdev->pm.vblank_sync = true;
6162                                                 wake_up(&rdev->irq.vblank_queue);
6163                                         }
6164                                         if (atomic_read(&rdev->irq.pflip[4]))
6165                                                 radeon_crtc_handle_flip(rdev, 4);
6166                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6167                                         DRM_DEBUG("IH: D5 vblank\n");
6168                                 }
6169                                 break;
6170                         case 1: /* D5 vline */
6171                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6172                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6173                                         DRM_DEBUG("IH: D5 vline\n");
6174                                 }
6175                                 break;
6176                         default:
6177                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6178                                 break;
6179                         }
6180                         break;
6181                 case 6: /* D6 vblank/vline */
6182                         switch (src_data) {
6183                         case 0: /* D6 vblank */
6184                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6185                                         if (rdev->irq.crtc_vblank_int[5]) {
6186                                                 drm_handle_vblank(rdev->ddev, 5);
6187                                                 rdev->pm.vblank_sync = true;
6188                                                 wake_up(&rdev->irq.vblank_queue);
6189                                         }
6190                                         if (atomic_read(&rdev->irq.pflip[5]))
6191                                                 radeon_crtc_handle_flip(rdev, 5);
6192                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6193                                         DRM_DEBUG("IH: D6 vblank\n");
6194                                 }
6195                                 break;
6196                         case 1: /* D6 vline */
6197                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6198                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6199                                         DRM_DEBUG("IH: D6 vline\n");
6200                                 }
6201                                 break;
6202                         default:
6203                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6204                                 break;
6205                         }
6206                         break;
6207                 case 42: /* HPD hotplug */
6208                         switch (src_data) {
6209                         case 0:
6210                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6211                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6212                                         queue_hotplug = true;
6213                                         DRM_DEBUG("IH: HPD1\n");
6214                                 }
6215                                 break;
6216                         case 1:
6217                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6218                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6219                                         queue_hotplug = true;
6220                                         DRM_DEBUG("IH: HPD2\n");
6221                                 }
6222                                 break;
6223                         case 2:
6224                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6225                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6226                                         queue_hotplug = true;
6227                                         DRM_DEBUG("IH: HPD3\n");
6228                                 }
6229                                 break;
6230                         case 3:
6231                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6232                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6233                                         queue_hotplug = true;
6234                                         DRM_DEBUG("IH: HPD4\n");
6235                                 }
6236                                 break;
6237                         case 4:
6238                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6239                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6240                                         queue_hotplug = true;
6241                                         DRM_DEBUG("IH: HPD5\n");
6242                                 }
6243                                 break;
6244                         case 5:
6245                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6246                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6247                                         queue_hotplug = true;
6248                                         DRM_DEBUG("IH: HPD6\n");
6249                                 }
6250                                 break;
6251                         default:
6252                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6253                                 break;
6254                         }
6255                         break;
6256                 case 146:
6257                 case 147:
6258                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6259                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6260                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6261                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6262                                 addr);
6263                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6264                                 status);
6265                         si_vm_decode_fault(rdev, status, addr);
6266                         /* reset addr and status */
6267                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6268                         break;
6269                 case 176: /* RINGID0 CP_INT */
6270                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6271                         break;
6272                 case 177: /* RINGID1 CP_INT */
6273                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6274                         break;
6275                 case 178: /* RINGID2 CP_INT */
6276                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6277                         break;
6278                 case 181: /* CP EOP event */
6279                         DRM_DEBUG("IH: CP EOP\n");
6280                         switch (ring_id) {
6281                         case 0:
6282                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6283                                 break;
6284                         case 1:
6285                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6286                                 break;
6287                         case 2:
6288                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6289                                 break;
6290                         }
6291                         break;
6292                 case 224: /* DMA trap event */
6293                         DRM_DEBUG("IH: DMA trap\n");
6294                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6295                         break;
6296                 case 230: /* thermal low to high */
6297                         DRM_DEBUG("IH: thermal low to high\n");
6298                         rdev->pm.dpm.thermal.high_to_low = false;
6299                         queue_thermal = true;
6300                         break;
6301                 case 231: /* thermal high to low */
6302                         DRM_DEBUG("IH: thermal high to low\n");
6303                         rdev->pm.dpm.thermal.high_to_low = true;
6304                         queue_thermal = true;
6305                         break;
6306                 case 233: /* GUI IDLE */
6307                         DRM_DEBUG("IH: GUI idle\n");
6308                         break;
6309                 case 244: /* DMA trap event */
6310                         DRM_DEBUG("IH: DMA1 trap\n");
6311                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6312                         break;
6313                 default:
6314                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6315                         break;
6316                 }
6317
6318                 /* wptr/rptr are in bytes! */
6319                 rptr += 16;
6320                 rptr &= rdev->ih.ptr_mask;
6321         }
6322         if (queue_hotplug)
6323                 schedule_work(&rdev->hotplug_work);
6324         if (queue_thermal && rdev->pm.dpm_enabled)
6325                 schedule_work(&rdev->pm.dpm.thermal.work);
6326         rdev->ih.rptr = rptr;
6327         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6328         atomic_set(&rdev->ih.lock, 0);
6329
6330         /* make sure wptr hasn't changed while processing */
6331         wptr = si_get_ih_wptr(rdev);
6332         if (wptr != rptr)
6333                 goto restart_ih;
6334
6335         return IRQ_HANDLED;
6336 }
6337
6338 /**
6339  * si_copy_dma - copy pages using the DMA engine
6340  *
6341  * @rdev: radeon_device pointer
6342  * @src_offset: src GPU address
6343  * @dst_offset: dst GPU address
6344  * @num_gpu_pages: number of GPU pages to xfer
6345  * @fence: radeon fence object
6346  *
6347  * Copy GPU paging using the DMA engine (SI).
6348  * Used by the radeon ttm implementation to move pages if
6349  * registered as the asic copy callback.
6350  */
6351 int si_copy_dma(struct radeon_device *rdev,
6352                 uint64_t src_offset, uint64_t dst_offset,
6353                 unsigned num_gpu_pages,
6354                 struct radeon_fence **fence)
6355 {
6356         struct radeon_semaphore *sem = NULL;
6357         int ring_index = rdev->asic->copy.dma_ring_index;
6358         struct radeon_ring *ring = &rdev->ring[ring_index];
6359         u32 size_in_bytes, cur_size_in_bytes;
6360         int i, num_loops;
6361         int r = 0;
6362
6363         r = radeon_semaphore_create(rdev, &sem);
6364         if (r) {
6365                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6366                 return r;
6367         }
6368
6369         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6370         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6371         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6372         if (r) {
6373                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6374                 radeon_semaphore_free(rdev, &sem, NULL);
6375                 return r;
6376         }
6377
6378         if (radeon_fence_need_sync(*fence, ring->idx)) {
6379                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6380                                             ring->idx);
6381                 radeon_fence_note_sync(*fence, ring->idx);
6382         } else {
6383                 radeon_semaphore_free(rdev, &sem, NULL);
6384         }
6385
6386         for (i = 0; i < num_loops; i++) {
6387                 cur_size_in_bytes = size_in_bytes;
6388                 if (cur_size_in_bytes > 0xFFFFF)
6389                         cur_size_in_bytes = 0xFFFFF;
6390                 size_in_bytes -= cur_size_in_bytes;
6391                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6392                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6393                 radeon_ring_write(ring, src_offset & 0xffffffff);
6394                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6395                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6396                 src_offset += cur_size_in_bytes;
6397                 dst_offset += cur_size_in_bytes;
6398         }
6399
6400         r = radeon_fence_emit(rdev, fence, ring->idx);
6401         if (r) {
6402                 radeon_ring_unlock_undo(rdev, ring);
6403                 return r;
6404         }
6405
6406         radeon_ring_unlock_commit(rdev, ring);
6407         radeon_semaphore_free(rdev, &sem, *fence);
6408
6409         return r;
6410 }
6411
6412 /*
6413  * startup/shutdown callbacks
6414  */
6415 static int si_startup(struct radeon_device *rdev)
6416 {
6417         struct radeon_ring *ring;
6418         int r;
6419
6420         /* enable pcie gen2/3 link */
6421         si_pcie_gen3_enable(rdev);
6422         /* enable aspm */
6423         si_program_aspm(rdev);
6424
6425         si_mc_program(rdev);
6426
6427         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6428             !rdev->rlc_fw || !rdev->mc_fw) {
6429                 r = si_init_microcode(rdev);
6430                 if (r) {
6431                         DRM_ERROR("Failed to load firmware!\n");
6432                         return r;
6433                 }
6434         }
6435
6436         r = si_mc_load_microcode(rdev);
6437         if (r) {
6438                 DRM_ERROR("Failed to load MC firmware!\n");
6439                 return r;
6440         }
6441
6442         r = r600_vram_scratch_init(rdev);
6443         if (r)
6444                 return r;
6445
6446         r = si_pcie_gart_enable(rdev);
6447         if (r)
6448                 return r;
6449         si_gpu_init(rdev);
6450
6451         /* allocate rlc buffers */
6452         r = si_rlc_init(rdev);
6453         if (r) {
6454                 DRM_ERROR("Failed to init rlc BOs!\n");
6455                 return r;
6456         }
6457
6458         /* allocate wb buffer */
6459         r = radeon_wb_init(rdev);
6460         if (r)
6461                 return r;
6462
6463         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6464         if (r) {
6465                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6466                 return r;
6467         }
6468
6469         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6470         if (r) {
6471                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6472                 return r;
6473         }
6474
6475         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6476         if (r) {
6477                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6478                 return r;
6479         }
6480
6481         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6482         if (r) {
6483                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6484                 return r;
6485         }
6486
6487         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6488         if (r) {
6489                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6490                 return r;
6491         }
6492
6493         if (rdev->has_uvd) {
6494                 r = rv770_uvd_resume(rdev);
6495                 if (!r) {
6496                         r = radeon_fence_driver_start_ring(rdev,
6497                                                            R600_RING_TYPE_UVD_INDEX);
6498                         if (r)
6499                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6500                 }
6501                 if (r)
6502                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6503         }
6504
6505         /* Enable IRQ */
6506         if (!rdev->irq.installed) {
6507                 r = radeon_irq_kms_init(rdev);
6508                 if (r)
6509                         return r;
6510         }
6511
6512         r = si_irq_init(rdev);
6513         if (r) {
6514                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6515                 radeon_irq_kms_fini(rdev);
6516                 return r;
6517         }
6518         si_irq_set(rdev);
6519
6520         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6521         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6522                              CP_RB0_RPTR, CP_RB0_WPTR,
6523                              0, 0xfffff, RADEON_CP_PACKET2);
6524         if (r)
6525                 return r;
6526
6527         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6528         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6529                              CP_RB1_RPTR, CP_RB1_WPTR,
6530                              0, 0xfffff, RADEON_CP_PACKET2);
6531         if (r)
6532                 return r;
6533
6534         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6535         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6536                              CP_RB2_RPTR, CP_RB2_WPTR,
6537                              0, 0xfffff, RADEON_CP_PACKET2);
6538         if (r)
6539                 return r;
6540
6541         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6542         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6543                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6544                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6545                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6546         if (r)
6547                 return r;
6548
6549         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6550         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6551                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6552                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6553                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6554         if (r)
6555                 return r;
6556
6557         r = si_cp_load_microcode(rdev);
6558         if (r)
6559                 return r;
6560         r = si_cp_resume(rdev);
6561         if (r)
6562                 return r;
6563
6564         r = cayman_dma_resume(rdev);
6565         if (r)
6566                 return r;
6567
6568         if (rdev->has_uvd) {
6569                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6570                 if (ring->ring_size) {
6571                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6572                                              R600_WB_UVD_RPTR_OFFSET,
6573                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6574                                              0, 0xfffff, RADEON_CP_PACKET2);
6575                         if (!r)
6576                                 r = r600_uvd_init(rdev);
6577                         if (r)
6578                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6579                 }
6580         }
6581
6582         r = radeon_ib_pool_init(rdev);
6583         if (r) {
6584                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6585                 return r;
6586         }
6587
6588         r = radeon_vm_manager_init(rdev);
6589         if (r) {
6590                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6591                 return r;
6592         }
6593
6594         return 0;
6595 }
6596
6597 int si_resume(struct radeon_device *rdev)
6598 {
6599         int r;
6600
6601         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6602          * posting will perform necessary task to bring back GPU into good
6603          * shape.
6604          */
6605         /* post card */
6606         atom_asic_init(rdev->mode_info.atom_context);
6607
6608         /* init golden registers */
6609         si_init_golden_registers(rdev);
6610
6611         rdev->accel_working = true;
6612         r = si_startup(rdev);
6613         if (r) {
6614                 DRM_ERROR("si startup failed on resume\n");
6615                 rdev->accel_working = false;
6616                 return r;
6617         }
6618
6619         return r;
6620
6621 }
6622
6623 int si_suspend(struct radeon_device *rdev)
6624 {
6625         radeon_vm_manager_fini(rdev);
6626         si_cp_enable(rdev, false);
6627         cayman_dma_stop(rdev);
6628         if (rdev->has_uvd) {
6629                 r600_uvd_stop(rdev);
6630                 radeon_uvd_suspend(rdev);
6631         }
6632         si_irq_suspend(rdev);
6633         radeon_wb_disable(rdev);
6634         si_pcie_gart_disable(rdev);
6635         return 0;
6636 }
6637
6638 /* Plan is to move initialization in that function and use
6639  * helper function so that radeon_device_init pretty much
6640  * do nothing more than calling asic specific function. This
6641  * should also allow to remove a bunch of callback function
6642  * like vram_info.
6643  */
6644 int si_init(struct radeon_device *rdev)
6645 {
6646         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6647         int r;
6648
6649         /* Read BIOS */
6650         if (!radeon_get_bios(rdev)) {
6651                 if (ASIC_IS_AVIVO(rdev))
6652                         return -EINVAL;
6653         }
6654         /* Must be an ATOMBIOS */
6655         if (!rdev->is_atom_bios) {
6656                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6657                 return -EINVAL;
6658         }
6659         r = radeon_atombios_init(rdev);
6660         if (r)
6661                 return r;
6662
6663         /* Post card if necessary */
6664         if (!radeon_card_posted(rdev)) {
6665                 if (!rdev->bios) {
6666                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6667                         return -EINVAL;
6668                 }
6669                 DRM_INFO("GPU not posted. posting now...\n");
6670                 atom_asic_init(rdev->mode_info.atom_context);
6671         }
6672         /* init golden registers */
6673         si_init_golden_registers(rdev);
6674         /* Initialize scratch registers */
6675         si_scratch_init(rdev);
6676         /* Initialize surface registers */
6677         radeon_surface_init(rdev);
6678         /* Initialize clocks */
6679         radeon_get_clock_info(rdev->ddev);
6680
6681         /* Fence driver */
6682         r = radeon_fence_driver_init(rdev);
6683         if (r)
6684                 return r;
6685
6686         /* initialize memory controller */
6687         r = si_mc_init(rdev);
6688         if (r)
6689                 return r;
6690         /* Memory manager */
6691         r = radeon_bo_init(rdev);
6692         if (r)
6693                 return r;
6694
6695         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6696         ring->ring_obj = NULL;
6697         r600_ring_init(rdev, ring, 1024 * 1024);
6698
6699         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6700         ring->ring_obj = NULL;
6701         r600_ring_init(rdev, ring, 1024 * 1024);
6702
6703         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6704         ring->ring_obj = NULL;
6705         r600_ring_init(rdev, ring, 1024 * 1024);
6706
6707         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6708         ring->ring_obj = NULL;
6709         r600_ring_init(rdev, ring, 64 * 1024);
6710
6711         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6712         ring->ring_obj = NULL;
6713         r600_ring_init(rdev, ring, 64 * 1024);
6714
6715         if (rdev->has_uvd) {
6716                 r = radeon_uvd_init(rdev);
6717                 if (!r) {
6718                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6719                         ring->ring_obj = NULL;
6720                         r600_ring_init(rdev, ring, 4096);
6721                 }
6722         }
6723
6724         rdev->ih.ring_obj = NULL;
6725         r600_ih_ring_init(rdev, 64 * 1024);
6726
6727         r = r600_pcie_gart_init(rdev);
6728         if (r)
6729                 return r;
6730
6731         rdev->accel_working = true;
6732         r = si_startup(rdev);
6733         if (r) {
6734                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6735                 si_cp_fini(rdev);
6736                 cayman_dma_fini(rdev);
6737                 si_irq_fini(rdev);
6738                 si_rlc_fini(rdev);
6739                 radeon_wb_fini(rdev);
6740                 radeon_ib_pool_fini(rdev);
6741                 radeon_vm_manager_fini(rdev);
6742                 radeon_irq_kms_fini(rdev);
6743                 si_pcie_gart_fini(rdev);
6744                 rdev->accel_working = false;
6745         }
6746
6747         /* Don't start up if the MC ucode is missing.
6748          * The default clocks and voltages before the MC ucode
6749          * is loaded are not suffient for advanced operations.
6750          */
6751         if (!rdev->mc_fw) {
6752                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6753                 return -EINVAL;
6754         }
6755
6756         return 0;
6757 }
6758
6759 void si_fini(struct radeon_device *rdev)
6760 {
6761         si_cp_fini(rdev);
6762         cayman_dma_fini(rdev);
6763         si_irq_fini(rdev);
6764         si_rlc_fini(rdev);
6765         si_fini_cg(rdev);
6766         si_fini_pg(rdev);
6767         radeon_wb_fini(rdev);
6768         radeon_vm_manager_fini(rdev);
6769         radeon_ib_pool_fini(rdev);
6770         radeon_irq_kms_fini(rdev);
6771         if (rdev->has_uvd) {
6772                 r600_uvd_stop(rdev);
6773                 radeon_uvd_fini(rdev);
6774         }
6775         si_pcie_gart_fini(rdev);
6776         r600_vram_scratch_fini(rdev);
6777         radeon_gem_fini(rdev);
6778         radeon_fence_driver_fini(rdev);
6779         radeon_bo_fini(rdev);
6780         radeon_atombios_fini(rdev);
6781         kfree(rdev->bios);
6782         rdev->bios = NULL;
6783 }
6784
6785 /**
6786  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6787  *
6788  * @rdev: radeon_device pointer
6789  *
6790  * Fetches a GPU clock counter snapshot (SI).
6791  * Returns the 64 bit clock counter snapshot.
6792  */
6793 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6794 {
6795         uint64_t clock;
6796
6797         mutex_lock(&rdev->gpu_clock_mutex);
6798         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6799         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6800                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6801         mutex_unlock(&rdev->gpu_clock_mutex);
6802         return clock;
6803 }
6804
6805 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6806 {
6807         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6808         int r;
6809
6810         /* bypass vclk and dclk with bclk */
6811         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6812                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6813                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6814
6815         /* put PLL in bypass mode */
6816         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6817
6818         if (!vclk || !dclk) {
6819                 /* keep the Bypass mode, put PLL to sleep */
6820                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6821                 return 0;
6822         }
6823
6824         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6825                                           16384, 0x03FFFFFF, 0, 128, 5,
6826                                           &fb_div, &vclk_div, &dclk_div);
6827         if (r)
6828                 return r;
6829
6830         /* set RESET_ANTI_MUX to 0 */
6831         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6832
6833         /* set VCO_MODE to 1 */
6834         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6835
6836         /* toggle UPLL_SLEEP to 1 then back to 0 */
6837         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6838         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6839
6840         /* deassert UPLL_RESET */
6841         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6842
6843         mdelay(1);
6844
6845         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6846         if (r)
6847                 return r;
6848
6849         /* assert UPLL_RESET again */
6850         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6851
6852         /* disable spread spectrum. */
6853         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6854
6855         /* set feedback divider */
6856         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6857
6858         /* set ref divider to 0 */
6859         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6860
6861         if (fb_div < 307200)
6862                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6863         else
6864                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6865
6866         /* set PDIV_A and PDIV_B */
6867         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6868                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6869                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6870
6871         /* give the PLL some time to settle */
6872         mdelay(15);
6873
6874         /* deassert PLL_RESET */
6875         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6876
6877         mdelay(15);
6878
6879         /* switch from bypass mode to normal mode */
6880         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6881
6882         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6883         if (r)
6884                 return r;
6885
6886         /* switch VCLK and DCLK selection */
6887         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6888                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6889                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6890
6891         mdelay(100);
6892
6893         return 0;
6894 }
6895
6896 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6897 {
6898         struct pci_dev *root = rdev->pdev->bus->self;
6899         int bridge_pos, gpu_pos;
6900         u32 speed_cntl, mask, current_data_rate;
6901         int ret, i;
6902         u16 tmp16;
6903
6904         if (radeon_pcie_gen2 == 0)
6905                 return;
6906
6907         if (rdev->flags & RADEON_IS_IGP)
6908                 return;
6909
6910         if (!(rdev->flags & RADEON_IS_PCIE))
6911                 return;
6912
6913         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6914         if (ret != 0)
6915                 return;
6916
6917         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6918                 return;
6919
6920         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6921         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6922                 LC_CURRENT_DATA_RATE_SHIFT;
6923         if (mask & DRM_PCIE_SPEED_80) {
6924                 if (current_data_rate == 2) {
6925                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6926                         return;
6927                 }
6928                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6929         } else if (mask & DRM_PCIE_SPEED_50) {
6930                 if (current_data_rate == 1) {
6931                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6932                         return;
6933                 }
6934                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6935         }
6936
6937         bridge_pos = pci_pcie_cap(root);
6938         if (!bridge_pos)
6939                 return;
6940
6941         gpu_pos = pci_pcie_cap(rdev->pdev);
6942         if (!gpu_pos)
6943                 return;
6944
6945         if (mask & DRM_PCIE_SPEED_80) {
6946                 /* re-try equalization if gen3 is not already enabled */
6947                 if (current_data_rate != 2) {
6948                         u16 bridge_cfg, gpu_cfg;
6949                         u16 bridge_cfg2, gpu_cfg2;
6950                         u32 max_lw, current_lw, tmp;
6951
6952                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6953                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6954
6955                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6956                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6957
6958                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6959                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6960
6961                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6962                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6963                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6964
6965                         if (current_lw < max_lw) {
6966                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6967                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6968                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6969                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6970                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6971                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6972                                 }
6973                         }
6974
6975                         for (i = 0; i < 10; i++) {
6976                                 /* check status */
6977                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6978                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6979                                         break;
6980
6981                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6982                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6983
6984                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6985                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6986
6987                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6988                                 tmp |= LC_SET_QUIESCE;
6989                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6990
6991                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6992                                 tmp |= LC_REDO_EQ;
6993                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6994
6995                                 mdelay(100);
6996
6997                                 /* linkctl */
6998                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6999                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7000                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7001                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7002
7003                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7004                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7005                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7006                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7007
7008                                 /* linkctl2 */
7009                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7010                                 tmp16 &= ~((1 << 4) | (7 << 9));
7011                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7012                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7013
7014                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7015                                 tmp16 &= ~((1 << 4) | (7 << 9));
7016                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7017                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7018
7019                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7020                                 tmp &= ~LC_SET_QUIESCE;
7021                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7022                         }
7023                 }
7024         }
7025
7026         /* set the link speed */
7027         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7028         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7029         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7030
7031         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7032         tmp16 &= ~0xf;
7033         if (mask & DRM_PCIE_SPEED_80)
7034                 tmp16 |= 3; /* gen3 */
7035         else if (mask & DRM_PCIE_SPEED_50)
7036                 tmp16 |= 2; /* gen2 */
7037         else
7038                 tmp16 |= 1; /* gen1 */
7039         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7040
7041         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7042         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7043         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7044
7045         for (i = 0; i < rdev->usec_timeout; i++) {
7046                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7047                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7048                         break;
7049                 udelay(1);
7050         }
7051 }
7052
7053 static void si_program_aspm(struct radeon_device *rdev)
7054 {
7055         u32 data, orig;
7056         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7057         bool disable_clkreq = false;
7058
7059         if (radeon_aspm == 0)
7060                 return;
7061
7062         if (!(rdev->flags & RADEON_IS_PCIE))
7063                 return;
7064
7065         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7066         data &= ~LC_XMIT_N_FTS_MASK;
7067         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7068         if (orig != data)
7069                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7070
7071         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7072         data |= LC_GO_TO_RECOVERY;
7073         if (orig != data)
7074                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7075
7076         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7077         data |= P_IGNORE_EDB_ERR;
7078         if (orig != data)
7079                 WREG32_PCIE(PCIE_P_CNTL, data);
7080
7081         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7082         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7083         data |= LC_PMI_TO_L1_DIS;
7084         if (!disable_l0s)
7085                 data |= LC_L0S_INACTIVITY(7);
7086
7087         if (!disable_l1) {
7088                 data |= LC_L1_INACTIVITY(7);
7089                 data &= ~LC_PMI_TO_L1_DIS;
7090                 if (orig != data)
7091                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7092
7093                 if (!disable_plloff_in_l1) {
7094                         bool clk_req_support;
7095
7096                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7097                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7098                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7099                         if (orig != data)
7100                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7101
7102                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7103                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7104                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7105                         if (orig != data)
7106                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7107
7108                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7109                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7110                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7111                         if (orig != data)
7112                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7113
7114                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7115                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7116                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7117                         if (orig != data)
7118                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7119
7120                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7121                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7122                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7123                                 if (orig != data)
7124                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7125
7126                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7127                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7128                                 if (orig != data)
7129                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7130
7131                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7132                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7133                                 if (orig != data)
7134                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7135
7136                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7137                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7138                                 if (orig != data)
7139                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7140
7141                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7142                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7143                                 if (orig != data)
7144                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7145
7146                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7147                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7148                                 if (orig != data)
7149                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7150
7151                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7152                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7153                                 if (orig != data)
7154                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7155
7156                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7157                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7158                                 if (orig != data)
7159                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7160                         }
7161                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7162                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7163                         data |= LC_DYN_LANES_PWR_STATE(3);
7164                         if (orig != data)
7165                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7166
7167                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7168                         data &= ~LS2_EXIT_TIME_MASK;
7169                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7170                                 data |= LS2_EXIT_TIME(5);
7171                         if (orig != data)
7172                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7173
7174                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7175                         data &= ~LS2_EXIT_TIME_MASK;
7176                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7177                                 data |= LS2_EXIT_TIME(5);
7178                         if (orig != data)
7179                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7180
7181                         if (!disable_clkreq) {
7182                                 struct pci_dev *root = rdev->pdev->bus->self;
7183                                 u32 lnkcap;
7184
7185                                 clk_req_support = false;
7186                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7187                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7188                                         clk_req_support = true;
7189                         } else {
7190                                 clk_req_support = false;
7191                         }
7192
7193                         if (clk_req_support) {
7194                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7195                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7196                                 if (orig != data)
7197                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7198
7199                                 orig = data = RREG32(THM_CLK_CNTL);
7200                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7201                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7202                                 if (orig != data)
7203                                         WREG32(THM_CLK_CNTL, data);
7204
7205                                 orig = data = RREG32(MISC_CLK_CNTL);
7206                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7207                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7208                                 if (orig != data)
7209                                         WREG32(MISC_CLK_CNTL, data);
7210
7211                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7212                                 data &= ~BCLK_AS_XCLK;
7213                                 if (orig != data)
7214                                         WREG32(CG_CLKPIN_CNTL, data);
7215
7216                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7217                                 data &= ~FORCE_BIF_REFCLK_EN;
7218                                 if (orig != data)
7219                                         WREG32(CG_CLKPIN_CNTL_2, data);
7220
7221                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7222                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7223                                 data |= MPLL_CLKOUT_SEL(4);
7224                                 if (orig != data)
7225                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7226
7227                                 orig = data = RREG32(SPLL_CNTL_MODE);
7228                                 data &= ~SPLL_REFCLK_SEL_MASK;
7229                                 if (orig != data)
7230                                         WREG32(SPLL_CNTL_MODE, data);
7231                         }
7232                 }
7233         } else {
7234                 if (orig != data)
7235                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7236         }
7237
7238         orig = data = RREG32_PCIE(PCIE_CNTL2);
7239         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7240         if (orig != data)
7241                 WREG32_PCIE(PCIE_CNTL2, data);
7242
7243         if (!disable_l0s) {
7244                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7245                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7246                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7247                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7248                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7249                                 data &= ~LC_L0S_INACTIVITY_MASK;
7250                                 if (orig != data)
7251                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7252                         }
7253                 }
7254         }
7255 }