]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/si.c
drm/radeon: update line buffer allocation for dce4.1/5
[karo-tx-linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82                                struct radeon_ib *ib,
83                                uint64_t pe,
84                                uint64_t addr, unsigned count,
85                                uint32_t incr, uint32_t flags);
86
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89         (0x8000 << 16) | (0x98f4 >> 2),
90         0x00000000,
91         (0x8040 << 16) | (0x98f4 >> 2),
92         0x00000000,
93         (0x8000 << 16) | (0xe80 >> 2),
94         0x00000000,
95         (0x8040 << 16) | (0xe80 >> 2),
96         0x00000000,
97         (0x8000 << 16) | (0x89bc >> 2),
98         0x00000000,
99         (0x8040 << 16) | (0x89bc >> 2),
100         0x00000000,
101         (0x8000 << 16) | (0x8c1c >> 2),
102         0x00000000,
103         (0x8040 << 16) | (0x8c1c >> 2),
104         0x00000000,
105         (0x9c00 << 16) | (0x98f0 >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0xe7c >> 2),
108         0x00000000,
109         (0x8000 << 16) | (0x9148 >> 2),
110         0x00000000,
111         (0x8040 << 16) | (0x9148 >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0x9150 >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0x897c >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x8d8c >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0xac54 >> 2),
120         0X00000000,
121         0x3,
122         (0x9c00 << 16) | (0x98f8 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9910 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9914 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9918 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x991c >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9920 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9924 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9928 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x992c >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9930 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9934 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9938 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x993c >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9940 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9944 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9948 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x994c >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9950 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9954 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9958 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x995c >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9960 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9964 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9968 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x996c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9970 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9974 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9978 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x997c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9980 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9984 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9988 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x998c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c00 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c14 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x8c04 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x8c08 >> 2),
195         0x00000000,
196         (0x8000 << 16) | (0x9b7c >> 2),
197         0x00000000,
198         (0x8040 << 16) | (0x9b7c >> 2),
199         0x00000000,
200         (0x8000 << 16) | (0xe84 >> 2),
201         0x00000000,
202         (0x8040 << 16) | (0xe84 >> 2),
203         0x00000000,
204         (0x8000 << 16) | (0x89c0 >> 2),
205         0x00000000,
206         (0x8040 << 16) | (0x89c0 >> 2),
207         0x00000000,
208         (0x8000 << 16) | (0x914c >> 2),
209         0x00000000,
210         (0x8040 << 16) | (0x914c >> 2),
211         0x00000000,
212         (0x8000 << 16) | (0x8c20 >> 2),
213         0x00000000,
214         (0x8040 << 16) | (0x8c20 >> 2),
215         0x00000000,
216         (0x8000 << 16) | (0x9354 >> 2),
217         0x00000000,
218         (0x8040 << 16) | (0x9354 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9060 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9364 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9100 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x913c >> 2),
227         0x00000000,
228         (0x8000 << 16) | (0x90e0 >> 2),
229         0x00000000,
230         (0x8000 << 16) | (0x90e4 >> 2),
231         0x00000000,
232         (0x8000 << 16) | (0x90e8 >> 2),
233         0x00000000,
234         (0x8040 << 16) | (0x90e0 >> 2),
235         0x00000000,
236         (0x8040 << 16) | (0x90e4 >> 2),
237         0x00000000,
238         (0x8040 << 16) | (0x90e8 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8bcc >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8b24 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x88c4 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8e50 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8c0c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x8e58 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x8e5c >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x9508 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x950c >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0x9494 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac0c >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac10 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xac14 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0xae00 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0xac08 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x88d4 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x88c8 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x88cc >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x89b0 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x8b10 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x8a14 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9830 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9834 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x9838 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x9a10 >> 2),
289         0x00000000,
290         (0x8000 << 16) | (0x9870 >> 2),
291         0x00000000,
292         (0x8000 << 16) | (0x9874 >> 2),
293         0x00000000,
294         (0x8001 << 16) | (0x9870 >> 2),
295         0x00000000,
296         (0x8001 << 16) | (0x9874 >> 2),
297         0x00000000,
298         (0x8040 << 16) | (0x9870 >> 2),
299         0x00000000,
300         (0x8040 << 16) | (0x9874 >> 2),
301         0x00000000,
302         (0x8041 << 16) | (0x9870 >> 2),
303         0x00000000,
304         (0x8041 << 16) | (0x9874 >> 2),
305         0x00000000,
306         0x00000000
307 };
308
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311         0xc424, 0xffffffff, 0x00601005,
312         0xc47c, 0xffffffff, 0x10104040,
313         0xc488, 0xffffffff, 0x0100000a,
314         0xc314, 0xffffffff, 0x00000800,
315         0xc30c, 0xffffffff, 0x800000f4,
316         0xf4a8, 0xffffffff, 0x00000000
317 };
318
319 static const u32 tahiti_golden_registers[] =
320 {
321         0x9a10, 0x00010000, 0x00018208,
322         0x9830, 0xffffffff, 0x00000000,
323         0x9834, 0xf00fffff, 0x00000400,
324         0x9838, 0x0002021c, 0x00020200,
325         0xc78, 0x00000080, 0x00000000,
326         0xd030, 0x000300c0, 0x00800040,
327         0xd830, 0x000300c0, 0x00800040,
328         0x5bb0, 0x000000f0, 0x00000070,
329         0x5bc0, 0x00200000, 0x50100000,
330         0x7030, 0x31000311, 0x00000011,
331         0x277c, 0x00000003, 0x000007ff,
332         0x240c, 0x000007ff, 0x00000000,
333         0x8a14, 0xf000001f, 0x00000007,
334         0x8b24, 0xffffffff, 0x00ffffff,
335         0x8b10, 0x0000ff0f, 0x00000000,
336         0x28a4c, 0x07ffffff, 0x4e000000,
337         0x28350, 0x3f3f3fff, 0x2a00126a,
338         0x30, 0x000000ff, 0x0040,
339         0x34, 0x00000040, 0x00004040,
340         0x9100, 0x07ffffff, 0x03000000,
341         0x8e88, 0x01ff1f3f, 0x00000000,
342         0x8e84, 0x01ff1f3f, 0x00000000,
343         0x9060, 0x0000007f, 0x00000020,
344         0x9508, 0x00010000, 0x00010000,
345         0xac14, 0x00000200, 0x000002fb,
346         0xac10, 0xffffffff, 0x0000543b,
347         0xac0c, 0xffffffff, 0xa9210876,
348         0x88d0, 0xffffffff, 0x000fff40,
349         0x88d4, 0x0000001f, 0x00000010,
350         0x1410, 0x20000000, 0x20fffed8,
351         0x15c0, 0x000c0fc0, 0x000c0400
352 };
353
354 static const u32 tahiti_golden_registers2[] =
355 {
356         0xc64, 0x00000001, 0x00000001
357 };
358
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361         0xc424, 0xffffffff, 0x00601004,
362         0xc47c, 0xffffffff, 0x10102020,
363         0xc488, 0xffffffff, 0x01000020,
364         0xc314, 0xffffffff, 0x00000800,
365         0xc30c, 0xffffffff, 0x800000a4
366 };
367
368 static const u32 pitcairn_golden_registers[] =
369 {
370         0x9a10, 0x00010000, 0x00018208,
371         0x9830, 0xffffffff, 0x00000000,
372         0x9834, 0xf00fffff, 0x00000400,
373         0x9838, 0x0002021c, 0x00020200,
374         0xc78, 0x00000080, 0x00000000,
375         0xd030, 0x000300c0, 0x00800040,
376         0xd830, 0x000300c0, 0x00800040,
377         0x5bb0, 0x000000f0, 0x00000070,
378         0x5bc0, 0x00200000, 0x50100000,
379         0x7030, 0x31000311, 0x00000011,
380         0x2ae4, 0x00073ffe, 0x000022a2,
381         0x240c, 0x000007ff, 0x00000000,
382         0x8a14, 0xf000001f, 0x00000007,
383         0x8b24, 0xffffffff, 0x00ffffff,
384         0x8b10, 0x0000ff0f, 0x00000000,
385         0x28a4c, 0x07ffffff, 0x4e000000,
386         0x28350, 0x3f3f3fff, 0x2a00126a,
387         0x30, 0x000000ff, 0x0040,
388         0x34, 0x00000040, 0x00004040,
389         0x9100, 0x07ffffff, 0x03000000,
390         0x9060, 0x0000007f, 0x00000020,
391         0x9508, 0x00010000, 0x00010000,
392         0xac14, 0x000003ff, 0x000000f7,
393         0xac10, 0xffffffff, 0x00000000,
394         0xac0c, 0xffffffff, 0x32761054,
395         0x88d4, 0x0000001f, 0x00000010,
396         0x15c0, 0x000c0fc0, 0x000c0400
397 };
398
399 static const u32 verde_golden_rlc_registers[] =
400 {
401         0xc424, 0xffffffff, 0x033f1005,
402         0xc47c, 0xffffffff, 0x10808020,
403         0xc488, 0xffffffff, 0x00800008,
404         0xc314, 0xffffffff, 0x00001000,
405         0xc30c, 0xffffffff, 0x80010014
406 };
407
408 static const u32 verde_golden_registers[] =
409 {
410         0x9a10, 0x00010000, 0x00018208,
411         0x9830, 0xffffffff, 0x00000000,
412         0x9834, 0xf00fffff, 0x00000400,
413         0x9838, 0x0002021c, 0x00020200,
414         0xc78, 0x00000080, 0x00000000,
415         0xd030, 0x000300c0, 0x00800040,
416         0xd030, 0x000300c0, 0x00800040,
417         0xd830, 0x000300c0, 0x00800040,
418         0xd830, 0x000300c0, 0x00800040,
419         0x5bb0, 0x000000f0, 0x00000070,
420         0x5bc0, 0x00200000, 0x50100000,
421         0x7030, 0x31000311, 0x00000011,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x2ae4, 0x00073ffe, 0x000022a2,
424         0x2ae4, 0x00073ffe, 0x000022a2,
425         0x240c, 0x000007ff, 0x00000000,
426         0x240c, 0x000007ff, 0x00000000,
427         0x240c, 0x000007ff, 0x00000000,
428         0x8a14, 0xf000001f, 0x00000007,
429         0x8a14, 0xf000001f, 0x00000007,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x0000124a,
435         0x28350, 0x3f3f3fff, 0x0000124a,
436         0x28350, 0x3f3f3fff, 0x0000124a,
437         0x30, 0x000000ff, 0x0040,
438         0x34, 0x00000040, 0x00004040,
439         0x9100, 0x07ffffff, 0x03000000,
440         0x9100, 0x07ffffff, 0x03000000,
441         0x8e88, 0x01ff1f3f, 0x00000000,
442         0x8e88, 0x01ff1f3f, 0x00000000,
443         0x8e88, 0x01ff1f3f, 0x00000000,
444         0x8e84, 0x01ff1f3f, 0x00000000,
445         0x8e84, 0x01ff1f3f, 0x00000000,
446         0x8e84, 0x01ff1f3f, 0x00000000,
447         0x9060, 0x0000007f, 0x00000020,
448         0x9508, 0x00010000, 0x00010000,
449         0xac14, 0x000003ff, 0x00000003,
450         0xac14, 0x000003ff, 0x00000003,
451         0xac14, 0x000003ff, 0x00000003,
452         0xac10, 0xffffffff, 0x00000000,
453         0xac10, 0xffffffff, 0x00000000,
454         0xac10, 0xffffffff, 0x00000000,
455         0xac0c, 0xffffffff, 0x00001032,
456         0xac0c, 0xffffffff, 0x00001032,
457         0xac0c, 0xffffffff, 0x00001032,
458         0x88d4, 0x0000001f, 0x00000010,
459         0x88d4, 0x0000001f, 0x00000010,
460         0x88d4, 0x0000001f, 0x00000010,
461         0x15c0, 0x000c0fc0, 0x000c0400
462 };
463
464 static const u32 oland_golden_rlc_registers[] =
465 {
466         0xc424, 0xffffffff, 0x00601005,
467         0xc47c, 0xffffffff, 0x10104040,
468         0xc488, 0xffffffff, 0x0100000a,
469         0xc314, 0xffffffff, 0x00000800,
470         0xc30c, 0xffffffff, 0x800000f4
471 };
472
473 static const u32 oland_golden_registers[] =
474 {
475         0x9a10, 0x00010000, 0x00018208,
476         0x9830, 0xffffffff, 0x00000000,
477         0x9834, 0xf00fffff, 0x00000400,
478         0x9838, 0x0002021c, 0x00020200,
479         0xc78, 0x00000080, 0x00000000,
480         0xd030, 0x000300c0, 0x00800040,
481         0xd830, 0x000300c0, 0x00800040,
482         0x5bb0, 0x000000f0, 0x00000070,
483         0x5bc0, 0x00200000, 0x50100000,
484         0x7030, 0x31000311, 0x00000011,
485         0x2ae4, 0x00073ffe, 0x000022a2,
486         0x240c, 0x000007ff, 0x00000000,
487         0x8a14, 0xf000001f, 0x00000007,
488         0x8b24, 0xffffffff, 0x00ffffff,
489         0x8b10, 0x0000ff0f, 0x00000000,
490         0x28a4c, 0x07ffffff, 0x4e000000,
491         0x28350, 0x3f3f3fff, 0x00000082,
492         0x30, 0x000000ff, 0x0040,
493         0x34, 0x00000040, 0x00004040,
494         0x9100, 0x07ffffff, 0x03000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x000000f3,
498         0xac10, 0xffffffff, 0x00000000,
499         0xac0c, 0xffffffff, 0x00003210,
500         0x88d4, 0x0000001f, 0x00000010,
501         0x15c0, 0x000c0fc0, 0x000c0400
502 };
503
504 static const u32 hainan_golden_registers[] =
505 {
506         0x9a10, 0x00010000, 0x00018208,
507         0x9830, 0xffffffff, 0x00000000,
508         0x9834, 0xf00fffff, 0x00000400,
509         0x9838, 0x0002021c, 0x00020200,
510         0xd0c0, 0xff000fff, 0x00000100,
511         0xd030, 0x000300c0, 0x00800040,
512         0xd8c0, 0xff000fff, 0x00000100,
513         0xd830, 0x000300c0, 0x00800040,
514         0x2ae4, 0x00073ffe, 0x000022a2,
515         0x240c, 0x000007ff, 0x00000000,
516         0x8a14, 0xf000001f, 0x00000007,
517         0x8b24, 0xffffffff, 0x00ffffff,
518         0x8b10, 0x0000ff0f, 0x00000000,
519         0x28a4c, 0x07ffffff, 0x4e000000,
520         0x28350, 0x3f3f3fff, 0x00000000,
521         0x30, 0x000000ff, 0x0040,
522         0x34, 0x00000040, 0x00004040,
523         0x9100, 0x03e00000, 0x03600000,
524         0x9060, 0x0000007f, 0x00000020,
525         0x9508, 0x00010000, 0x00010000,
526         0xac14, 0x000003ff, 0x000000f1,
527         0xac10, 0xffffffff, 0x00000000,
528         0xac0c, 0xffffffff, 0x00003210,
529         0x88d4, 0x0000001f, 0x00000010,
530         0x15c0, 0x000c0fc0, 0x000c0400
531 };
532
533 static const u32 hainan_golden_registers2[] =
534 {
535         0x98f8, 0xffffffff, 0x02010001
536 };
537
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540         0xc400, 0xffffffff, 0xfffffffc,
541         0x802c, 0xffffffff, 0xe0000000,
542         0x9a60, 0xffffffff, 0x00000100,
543         0x92a4, 0xffffffff, 0x00000100,
544         0xc164, 0xffffffff, 0x00000100,
545         0x9774, 0xffffffff, 0x00000100,
546         0x8984, 0xffffffff, 0x06000100,
547         0x8a18, 0xffffffff, 0x00000100,
548         0x92a0, 0xffffffff, 0x00000100,
549         0xc380, 0xffffffff, 0x00000100,
550         0x8b28, 0xffffffff, 0x00000100,
551         0x9144, 0xffffffff, 0x00000100,
552         0x8d88, 0xffffffff, 0x00000100,
553         0x8d8c, 0xffffffff, 0x00000100,
554         0x9030, 0xffffffff, 0x00000100,
555         0x9034, 0xffffffff, 0x00000100,
556         0x9038, 0xffffffff, 0x00000100,
557         0x903c, 0xffffffff, 0x00000100,
558         0xad80, 0xffffffff, 0x00000100,
559         0xac54, 0xffffffff, 0x00000100,
560         0x897c, 0xffffffff, 0x06000100,
561         0x9868, 0xffffffff, 0x00000100,
562         0x9510, 0xffffffff, 0x00000100,
563         0xaf04, 0xffffffff, 0x00000100,
564         0xae04, 0xffffffff, 0x00000100,
565         0x949c, 0xffffffff, 0x00000100,
566         0x802c, 0xffffffff, 0xe0000000,
567         0x9160, 0xffffffff, 0x00010000,
568         0x9164, 0xffffffff, 0x00030002,
569         0x9168, 0xffffffff, 0x00040007,
570         0x916c, 0xffffffff, 0x00060005,
571         0x9170, 0xffffffff, 0x00090008,
572         0x9174, 0xffffffff, 0x00020001,
573         0x9178, 0xffffffff, 0x00040003,
574         0x917c, 0xffffffff, 0x00000007,
575         0x9180, 0xffffffff, 0x00060005,
576         0x9184, 0xffffffff, 0x00090008,
577         0x9188, 0xffffffff, 0x00030002,
578         0x918c, 0xffffffff, 0x00050004,
579         0x9190, 0xffffffff, 0x00000008,
580         0x9194, 0xffffffff, 0x00070006,
581         0x9198, 0xffffffff, 0x000a0009,
582         0x919c, 0xffffffff, 0x00040003,
583         0x91a0, 0xffffffff, 0x00060005,
584         0x91a4, 0xffffffff, 0x00000009,
585         0x91a8, 0xffffffff, 0x00080007,
586         0x91ac, 0xffffffff, 0x000b000a,
587         0x91b0, 0xffffffff, 0x00050004,
588         0x91b4, 0xffffffff, 0x00070006,
589         0x91b8, 0xffffffff, 0x0008000b,
590         0x91bc, 0xffffffff, 0x000a0009,
591         0x91c0, 0xffffffff, 0x000d000c,
592         0x91c4, 0xffffffff, 0x00060005,
593         0x91c8, 0xffffffff, 0x00080007,
594         0x91cc, 0xffffffff, 0x0000000b,
595         0x91d0, 0xffffffff, 0x000a0009,
596         0x91d4, 0xffffffff, 0x000d000c,
597         0x91d8, 0xffffffff, 0x00070006,
598         0x91dc, 0xffffffff, 0x00090008,
599         0x91e0, 0xffffffff, 0x0000000c,
600         0x91e4, 0xffffffff, 0x000b000a,
601         0x91e8, 0xffffffff, 0x000e000d,
602         0x91ec, 0xffffffff, 0x00080007,
603         0x91f0, 0xffffffff, 0x000a0009,
604         0x91f4, 0xffffffff, 0x0000000d,
605         0x91f8, 0xffffffff, 0x000c000b,
606         0x91fc, 0xffffffff, 0x000f000e,
607         0x9200, 0xffffffff, 0x00090008,
608         0x9204, 0xffffffff, 0x000b000a,
609         0x9208, 0xffffffff, 0x000c000f,
610         0x920c, 0xffffffff, 0x000e000d,
611         0x9210, 0xffffffff, 0x00110010,
612         0x9214, 0xffffffff, 0x000a0009,
613         0x9218, 0xffffffff, 0x000c000b,
614         0x921c, 0xffffffff, 0x0000000f,
615         0x9220, 0xffffffff, 0x000e000d,
616         0x9224, 0xffffffff, 0x00110010,
617         0x9228, 0xffffffff, 0x000b000a,
618         0x922c, 0xffffffff, 0x000d000c,
619         0x9230, 0xffffffff, 0x00000010,
620         0x9234, 0xffffffff, 0x000f000e,
621         0x9238, 0xffffffff, 0x00120011,
622         0x923c, 0xffffffff, 0x000c000b,
623         0x9240, 0xffffffff, 0x000e000d,
624         0x9244, 0xffffffff, 0x00000011,
625         0x9248, 0xffffffff, 0x0010000f,
626         0x924c, 0xffffffff, 0x00130012,
627         0x9250, 0xffffffff, 0x000d000c,
628         0x9254, 0xffffffff, 0x000f000e,
629         0x9258, 0xffffffff, 0x00100013,
630         0x925c, 0xffffffff, 0x00120011,
631         0x9260, 0xffffffff, 0x00150014,
632         0x9264, 0xffffffff, 0x000e000d,
633         0x9268, 0xffffffff, 0x0010000f,
634         0x926c, 0xffffffff, 0x00000013,
635         0x9270, 0xffffffff, 0x00120011,
636         0x9274, 0xffffffff, 0x00150014,
637         0x9278, 0xffffffff, 0x000f000e,
638         0x927c, 0xffffffff, 0x00110010,
639         0x9280, 0xffffffff, 0x00000014,
640         0x9284, 0xffffffff, 0x00130012,
641         0x9288, 0xffffffff, 0x00160015,
642         0x928c, 0xffffffff, 0x0010000f,
643         0x9290, 0xffffffff, 0x00120011,
644         0x9294, 0xffffffff, 0x00000015,
645         0x9298, 0xffffffff, 0x00140013,
646         0x929c, 0xffffffff, 0x00170016,
647         0x9150, 0xffffffff, 0x96940200,
648         0x8708, 0xffffffff, 0x00900100,
649         0xc478, 0xffffffff, 0x00000080,
650         0xc404, 0xffffffff, 0x0020003f,
651         0x30, 0xffffffff, 0x0000001c,
652         0x34, 0x000f0000, 0x000f0000,
653         0x160c, 0xffffffff, 0x00000100,
654         0x1024, 0xffffffff, 0x00000100,
655         0x102c, 0x00000101, 0x00000000,
656         0x20a8, 0xffffffff, 0x00000104,
657         0x264c, 0x000c0000, 0x000c0000,
658         0x2648, 0x000c0000, 0x000c0000,
659         0x55e4, 0xff000fff, 0x00000100,
660         0x55e8, 0x00000001, 0x00000001,
661         0x2f50, 0x00000001, 0x00000001,
662         0x30cc, 0xc0000fff, 0x00000104,
663         0xc1e4, 0x00000001, 0x00000001,
664         0xd0c0, 0xfffffff0, 0x00000100,
665         0xd8c0, 0xfffffff0, 0x00000100
666 };
667
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670         0xc400, 0xffffffff, 0xfffffffc,
671         0x802c, 0xffffffff, 0xe0000000,
672         0x9a60, 0xffffffff, 0x00000100,
673         0x92a4, 0xffffffff, 0x00000100,
674         0xc164, 0xffffffff, 0x00000100,
675         0x9774, 0xffffffff, 0x00000100,
676         0x8984, 0xffffffff, 0x06000100,
677         0x8a18, 0xffffffff, 0x00000100,
678         0x92a0, 0xffffffff, 0x00000100,
679         0xc380, 0xffffffff, 0x00000100,
680         0x8b28, 0xffffffff, 0x00000100,
681         0x9144, 0xffffffff, 0x00000100,
682         0x8d88, 0xffffffff, 0x00000100,
683         0x8d8c, 0xffffffff, 0x00000100,
684         0x9030, 0xffffffff, 0x00000100,
685         0x9034, 0xffffffff, 0x00000100,
686         0x9038, 0xffffffff, 0x00000100,
687         0x903c, 0xffffffff, 0x00000100,
688         0xad80, 0xffffffff, 0x00000100,
689         0xac54, 0xffffffff, 0x00000100,
690         0x897c, 0xffffffff, 0x06000100,
691         0x9868, 0xffffffff, 0x00000100,
692         0x9510, 0xffffffff, 0x00000100,
693         0xaf04, 0xffffffff, 0x00000100,
694         0xae04, 0xffffffff, 0x00000100,
695         0x949c, 0xffffffff, 0x00000100,
696         0x802c, 0xffffffff, 0xe0000000,
697         0x9160, 0xffffffff, 0x00010000,
698         0x9164, 0xffffffff, 0x00030002,
699         0x9168, 0xffffffff, 0x00040007,
700         0x916c, 0xffffffff, 0x00060005,
701         0x9170, 0xffffffff, 0x00090008,
702         0x9174, 0xffffffff, 0x00020001,
703         0x9178, 0xffffffff, 0x00040003,
704         0x917c, 0xffffffff, 0x00000007,
705         0x9180, 0xffffffff, 0x00060005,
706         0x9184, 0xffffffff, 0x00090008,
707         0x9188, 0xffffffff, 0x00030002,
708         0x918c, 0xffffffff, 0x00050004,
709         0x9190, 0xffffffff, 0x00000008,
710         0x9194, 0xffffffff, 0x00070006,
711         0x9198, 0xffffffff, 0x000a0009,
712         0x919c, 0xffffffff, 0x00040003,
713         0x91a0, 0xffffffff, 0x00060005,
714         0x91a4, 0xffffffff, 0x00000009,
715         0x91a8, 0xffffffff, 0x00080007,
716         0x91ac, 0xffffffff, 0x000b000a,
717         0x91b0, 0xffffffff, 0x00050004,
718         0x91b4, 0xffffffff, 0x00070006,
719         0x91b8, 0xffffffff, 0x0008000b,
720         0x91bc, 0xffffffff, 0x000a0009,
721         0x91c0, 0xffffffff, 0x000d000c,
722         0x9200, 0xffffffff, 0x00090008,
723         0x9204, 0xffffffff, 0x000b000a,
724         0x9208, 0xffffffff, 0x000c000f,
725         0x920c, 0xffffffff, 0x000e000d,
726         0x9210, 0xffffffff, 0x00110010,
727         0x9214, 0xffffffff, 0x000a0009,
728         0x9218, 0xffffffff, 0x000c000b,
729         0x921c, 0xffffffff, 0x0000000f,
730         0x9220, 0xffffffff, 0x000e000d,
731         0x9224, 0xffffffff, 0x00110010,
732         0x9228, 0xffffffff, 0x000b000a,
733         0x922c, 0xffffffff, 0x000d000c,
734         0x9230, 0xffffffff, 0x00000010,
735         0x9234, 0xffffffff, 0x000f000e,
736         0x9238, 0xffffffff, 0x00120011,
737         0x923c, 0xffffffff, 0x000c000b,
738         0x9240, 0xffffffff, 0x000e000d,
739         0x9244, 0xffffffff, 0x00000011,
740         0x9248, 0xffffffff, 0x0010000f,
741         0x924c, 0xffffffff, 0x00130012,
742         0x9250, 0xffffffff, 0x000d000c,
743         0x9254, 0xffffffff, 0x000f000e,
744         0x9258, 0xffffffff, 0x00100013,
745         0x925c, 0xffffffff, 0x00120011,
746         0x9260, 0xffffffff, 0x00150014,
747         0x9150, 0xffffffff, 0x96940200,
748         0x8708, 0xffffffff, 0x00900100,
749         0xc478, 0xffffffff, 0x00000080,
750         0xc404, 0xffffffff, 0x0020003f,
751         0x30, 0xffffffff, 0x0000001c,
752         0x34, 0x000f0000, 0x000f0000,
753         0x160c, 0xffffffff, 0x00000100,
754         0x1024, 0xffffffff, 0x00000100,
755         0x102c, 0x00000101, 0x00000000,
756         0x20a8, 0xffffffff, 0x00000104,
757         0x55e4, 0xff000fff, 0x00000100,
758         0x55e8, 0x00000001, 0x00000001,
759         0x2f50, 0x00000001, 0x00000001,
760         0x30cc, 0xc0000fff, 0x00000104,
761         0xc1e4, 0x00000001, 0x00000001,
762         0xd0c0, 0xfffffff0, 0x00000100,
763         0xd8c0, 0xfffffff0, 0x00000100
764 };
765
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768         0xc400, 0xffffffff, 0xfffffffc,
769         0x802c, 0xffffffff, 0xe0000000,
770         0x9a60, 0xffffffff, 0x00000100,
771         0x92a4, 0xffffffff, 0x00000100,
772         0xc164, 0xffffffff, 0x00000100,
773         0x9774, 0xffffffff, 0x00000100,
774         0x8984, 0xffffffff, 0x06000100,
775         0x8a18, 0xffffffff, 0x00000100,
776         0x92a0, 0xffffffff, 0x00000100,
777         0xc380, 0xffffffff, 0x00000100,
778         0x8b28, 0xffffffff, 0x00000100,
779         0x9144, 0xffffffff, 0x00000100,
780         0x8d88, 0xffffffff, 0x00000100,
781         0x8d8c, 0xffffffff, 0x00000100,
782         0x9030, 0xffffffff, 0x00000100,
783         0x9034, 0xffffffff, 0x00000100,
784         0x9038, 0xffffffff, 0x00000100,
785         0x903c, 0xffffffff, 0x00000100,
786         0xad80, 0xffffffff, 0x00000100,
787         0xac54, 0xffffffff, 0x00000100,
788         0x897c, 0xffffffff, 0x06000100,
789         0x9868, 0xffffffff, 0x00000100,
790         0x9510, 0xffffffff, 0x00000100,
791         0xaf04, 0xffffffff, 0x00000100,
792         0xae04, 0xffffffff, 0x00000100,
793         0x949c, 0xffffffff, 0x00000100,
794         0x802c, 0xffffffff, 0xe0000000,
795         0x9160, 0xffffffff, 0x00010000,
796         0x9164, 0xffffffff, 0x00030002,
797         0x9168, 0xffffffff, 0x00040007,
798         0x916c, 0xffffffff, 0x00060005,
799         0x9170, 0xffffffff, 0x00090008,
800         0x9174, 0xffffffff, 0x00020001,
801         0x9178, 0xffffffff, 0x00040003,
802         0x917c, 0xffffffff, 0x00000007,
803         0x9180, 0xffffffff, 0x00060005,
804         0x9184, 0xffffffff, 0x00090008,
805         0x9188, 0xffffffff, 0x00030002,
806         0x918c, 0xffffffff, 0x00050004,
807         0x9190, 0xffffffff, 0x00000008,
808         0x9194, 0xffffffff, 0x00070006,
809         0x9198, 0xffffffff, 0x000a0009,
810         0x919c, 0xffffffff, 0x00040003,
811         0x91a0, 0xffffffff, 0x00060005,
812         0x91a4, 0xffffffff, 0x00000009,
813         0x91a8, 0xffffffff, 0x00080007,
814         0x91ac, 0xffffffff, 0x000b000a,
815         0x91b0, 0xffffffff, 0x00050004,
816         0x91b4, 0xffffffff, 0x00070006,
817         0x91b8, 0xffffffff, 0x0008000b,
818         0x91bc, 0xffffffff, 0x000a0009,
819         0x91c0, 0xffffffff, 0x000d000c,
820         0x9200, 0xffffffff, 0x00090008,
821         0x9204, 0xffffffff, 0x000b000a,
822         0x9208, 0xffffffff, 0x000c000f,
823         0x920c, 0xffffffff, 0x000e000d,
824         0x9210, 0xffffffff, 0x00110010,
825         0x9214, 0xffffffff, 0x000a0009,
826         0x9218, 0xffffffff, 0x000c000b,
827         0x921c, 0xffffffff, 0x0000000f,
828         0x9220, 0xffffffff, 0x000e000d,
829         0x9224, 0xffffffff, 0x00110010,
830         0x9228, 0xffffffff, 0x000b000a,
831         0x922c, 0xffffffff, 0x000d000c,
832         0x9230, 0xffffffff, 0x00000010,
833         0x9234, 0xffffffff, 0x000f000e,
834         0x9238, 0xffffffff, 0x00120011,
835         0x923c, 0xffffffff, 0x000c000b,
836         0x9240, 0xffffffff, 0x000e000d,
837         0x9244, 0xffffffff, 0x00000011,
838         0x9248, 0xffffffff, 0x0010000f,
839         0x924c, 0xffffffff, 0x00130012,
840         0x9250, 0xffffffff, 0x000d000c,
841         0x9254, 0xffffffff, 0x000f000e,
842         0x9258, 0xffffffff, 0x00100013,
843         0x925c, 0xffffffff, 0x00120011,
844         0x9260, 0xffffffff, 0x00150014,
845         0x9150, 0xffffffff, 0x96940200,
846         0x8708, 0xffffffff, 0x00900100,
847         0xc478, 0xffffffff, 0x00000080,
848         0xc404, 0xffffffff, 0x0020003f,
849         0x30, 0xffffffff, 0x0000001c,
850         0x34, 0x000f0000, 0x000f0000,
851         0x160c, 0xffffffff, 0x00000100,
852         0x1024, 0xffffffff, 0x00000100,
853         0x102c, 0x00000101, 0x00000000,
854         0x20a8, 0xffffffff, 0x00000104,
855         0x264c, 0x000c0000, 0x000c0000,
856         0x2648, 0x000c0000, 0x000c0000,
857         0x55e4, 0xff000fff, 0x00000100,
858         0x55e8, 0x00000001, 0x00000001,
859         0x2f50, 0x00000001, 0x00000001,
860         0x30cc, 0xc0000fff, 0x00000104,
861         0xc1e4, 0x00000001, 0x00000001,
862         0xd0c0, 0xfffffff0, 0x00000100,
863         0xd8c0, 0xfffffff0, 0x00000100
864 };
865
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868         0xc400, 0xffffffff, 0xfffffffc,
869         0x802c, 0xffffffff, 0xe0000000,
870         0x9a60, 0xffffffff, 0x00000100,
871         0x92a4, 0xffffffff, 0x00000100,
872         0xc164, 0xffffffff, 0x00000100,
873         0x9774, 0xffffffff, 0x00000100,
874         0x8984, 0xffffffff, 0x06000100,
875         0x8a18, 0xffffffff, 0x00000100,
876         0x92a0, 0xffffffff, 0x00000100,
877         0xc380, 0xffffffff, 0x00000100,
878         0x8b28, 0xffffffff, 0x00000100,
879         0x9144, 0xffffffff, 0x00000100,
880         0x8d88, 0xffffffff, 0x00000100,
881         0x8d8c, 0xffffffff, 0x00000100,
882         0x9030, 0xffffffff, 0x00000100,
883         0x9034, 0xffffffff, 0x00000100,
884         0x9038, 0xffffffff, 0x00000100,
885         0x903c, 0xffffffff, 0x00000100,
886         0xad80, 0xffffffff, 0x00000100,
887         0xac54, 0xffffffff, 0x00000100,
888         0x897c, 0xffffffff, 0x06000100,
889         0x9868, 0xffffffff, 0x00000100,
890         0x9510, 0xffffffff, 0x00000100,
891         0xaf04, 0xffffffff, 0x00000100,
892         0xae04, 0xffffffff, 0x00000100,
893         0x949c, 0xffffffff, 0x00000100,
894         0x802c, 0xffffffff, 0xe0000000,
895         0x9160, 0xffffffff, 0x00010000,
896         0x9164, 0xffffffff, 0x00030002,
897         0x9168, 0xffffffff, 0x00040007,
898         0x916c, 0xffffffff, 0x00060005,
899         0x9170, 0xffffffff, 0x00090008,
900         0x9174, 0xffffffff, 0x00020001,
901         0x9178, 0xffffffff, 0x00040003,
902         0x917c, 0xffffffff, 0x00000007,
903         0x9180, 0xffffffff, 0x00060005,
904         0x9184, 0xffffffff, 0x00090008,
905         0x9188, 0xffffffff, 0x00030002,
906         0x918c, 0xffffffff, 0x00050004,
907         0x9190, 0xffffffff, 0x00000008,
908         0x9194, 0xffffffff, 0x00070006,
909         0x9198, 0xffffffff, 0x000a0009,
910         0x919c, 0xffffffff, 0x00040003,
911         0x91a0, 0xffffffff, 0x00060005,
912         0x91a4, 0xffffffff, 0x00000009,
913         0x91a8, 0xffffffff, 0x00080007,
914         0x91ac, 0xffffffff, 0x000b000a,
915         0x91b0, 0xffffffff, 0x00050004,
916         0x91b4, 0xffffffff, 0x00070006,
917         0x91b8, 0xffffffff, 0x0008000b,
918         0x91bc, 0xffffffff, 0x000a0009,
919         0x91c0, 0xffffffff, 0x000d000c,
920         0x91c4, 0xffffffff, 0x00060005,
921         0x91c8, 0xffffffff, 0x00080007,
922         0x91cc, 0xffffffff, 0x0000000b,
923         0x91d0, 0xffffffff, 0x000a0009,
924         0x91d4, 0xffffffff, 0x000d000c,
925         0x9150, 0xffffffff, 0x96940200,
926         0x8708, 0xffffffff, 0x00900100,
927         0xc478, 0xffffffff, 0x00000080,
928         0xc404, 0xffffffff, 0x0020003f,
929         0x30, 0xffffffff, 0x0000001c,
930         0x34, 0x000f0000, 0x000f0000,
931         0x160c, 0xffffffff, 0x00000100,
932         0x1024, 0xffffffff, 0x00000100,
933         0x102c, 0x00000101, 0x00000000,
934         0x20a8, 0xffffffff, 0x00000104,
935         0x264c, 0x000c0000, 0x000c0000,
936         0x2648, 0x000c0000, 0x000c0000,
937         0x55e4, 0xff000fff, 0x00000100,
938         0x55e8, 0x00000001, 0x00000001,
939         0x2f50, 0x00000001, 0x00000001,
940         0x30cc, 0xc0000fff, 0x00000104,
941         0xc1e4, 0x00000001, 0x00000001,
942         0xd0c0, 0xfffffff0, 0x00000100,
943         0xd8c0, 0xfffffff0, 0x00000100
944 };
945
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948         0xc400, 0xffffffff, 0xfffffffc,
949         0x802c, 0xffffffff, 0xe0000000,
950         0x9a60, 0xffffffff, 0x00000100,
951         0x92a4, 0xffffffff, 0x00000100,
952         0xc164, 0xffffffff, 0x00000100,
953         0x9774, 0xffffffff, 0x00000100,
954         0x8984, 0xffffffff, 0x06000100,
955         0x8a18, 0xffffffff, 0x00000100,
956         0x92a0, 0xffffffff, 0x00000100,
957         0xc380, 0xffffffff, 0x00000100,
958         0x8b28, 0xffffffff, 0x00000100,
959         0x9144, 0xffffffff, 0x00000100,
960         0x8d88, 0xffffffff, 0x00000100,
961         0x8d8c, 0xffffffff, 0x00000100,
962         0x9030, 0xffffffff, 0x00000100,
963         0x9034, 0xffffffff, 0x00000100,
964         0x9038, 0xffffffff, 0x00000100,
965         0x903c, 0xffffffff, 0x00000100,
966         0xad80, 0xffffffff, 0x00000100,
967         0xac54, 0xffffffff, 0x00000100,
968         0x897c, 0xffffffff, 0x06000100,
969         0x9868, 0xffffffff, 0x00000100,
970         0x9510, 0xffffffff, 0x00000100,
971         0xaf04, 0xffffffff, 0x00000100,
972         0xae04, 0xffffffff, 0x00000100,
973         0x949c, 0xffffffff, 0x00000100,
974         0x802c, 0xffffffff, 0xe0000000,
975         0x9160, 0xffffffff, 0x00010000,
976         0x9164, 0xffffffff, 0x00030002,
977         0x9168, 0xffffffff, 0x00040007,
978         0x916c, 0xffffffff, 0x00060005,
979         0x9170, 0xffffffff, 0x00090008,
980         0x9174, 0xffffffff, 0x00020001,
981         0x9178, 0xffffffff, 0x00040003,
982         0x917c, 0xffffffff, 0x00000007,
983         0x9180, 0xffffffff, 0x00060005,
984         0x9184, 0xffffffff, 0x00090008,
985         0x9188, 0xffffffff, 0x00030002,
986         0x918c, 0xffffffff, 0x00050004,
987         0x9190, 0xffffffff, 0x00000008,
988         0x9194, 0xffffffff, 0x00070006,
989         0x9198, 0xffffffff, 0x000a0009,
990         0x919c, 0xffffffff, 0x00040003,
991         0x91a0, 0xffffffff, 0x00060005,
992         0x91a4, 0xffffffff, 0x00000009,
993         0x91a8, 0xffffffff, 0x00080007,
994         0x91ac, 0xffffffff, 0x000b000a,
995         0x91b0, 0xffffffff, 0x00050004,
996         0x91b4, 0xffffffff, 0x00070006,
997         0x91b8, 0xffffffff, 0x0008000b,
998         0x91bc, 0xffffffff, 0x000a0009,
999         0x91c0, 0xffffffff, 0x000d000c,
1000         0x91c4, 0xffffffff, 0x00060005,
1001         0x91c8, 0xffffffff, 0x00080007,
1002         0x91cc, 0xffffffff, 0x0000000b,
1003         0x91d0, 0xffffffff, 0x000a0009,
1004         0x91d4, 0xffffffff, 0x000d000c,
1005         0x9150, 0xffffffff, 0x96940200,
1006         0x8708, 0xffffffff, 0x00900100,
1007         0xc478, 0xffffffff, 0x00000080,
1008         0xc404, 0xffffffff, 0x0020003f,
1009         0x30, 0xffffffff, 0x0000001c,
1010         0x34, 0x000f0000, 0x000f0000,
1011         0x160c, 0xffffffff, 0x00000100,
1012         0x1024, 0xffffffff, 0x00000100,
1013         0x20a8, 0xffffffff, 0x00000104,
1014         0x264c, 0x000c0000, 0x000c0000,
1015         0x2648, 0x000c0000, 0x000c0000,
1016         0x2f50, 0x00000001, 0x00000001,
1017         0x30cc, 0xc0000fff, 0x00000104,
1018         0xc1e4, 0x00000001, 0x00000001,
1019         0xd0c0, 0xfffffff0, 0x00000100,
1020         0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022
1023 static u32 verde_pg_init[] =
1024 {
1025         0x353c, 0xffffffff, 0x40000,
1026         0x3538, 0xffffffff, 0x200010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x7007,
1033         0x3538, 0xffffffff, 0x300010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x400000,
1040         0x3538, 0xffffffff, 0x100010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x120200,
1047         0x3538, 0xffffffff, 0x500010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x1e1e16,
1054         0x3538, 0xffffffff, 0x600010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x171f1e,
1061         0x3538, 0xffffffff, 0x700010ff,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x0,
1068         0x3538, 0xffffffff, 0x9ff,
1069         0x3500, 0xffffffff, 0x0,
1070         0x3504, 0xffffffff, 0x10000800,
1071         0x3504, 0xffffffff, 0xf,
1072         0x3504, 0xffffffff, 0xf,
1073         0x3500, 0xffffffff, 0x4,
1074         0x3504, 0xffffffff, 0x1000051e,
1075         0x3504, 0xffffffff, 0xffff,
1076         0x3504, 0xffffffff, 0xffff,
1077         0x3500, 0xffffffff, 0x8,
1078         0x3504, 0xffffffff, 0x80500,
1079         0x3500, 0xffffffff, 0x12,
1080         0x3504, 0xffffffff, 0x9050c,
1081         0x3500, 0xffffffff, 0x1d,
1082         0x3504, 0xffffffff, 0xb052c,
1083         0x3500, 0xffffffff, 0x2a,
1084         0x3504, 0xffffffff, 0x1053e,
1085         0x3500, 0xffffffff, 0x2d,
1086         0x3504, 0xffffffff, 0x10546,
1087         0x3500, 0xffffffff, 0x30,
1088         0x3504, 0xffffffff, 0xa054e,
1089         0x3500, 0xffffffff, 0x3c,
1090         0x3504, 0xffffffff, 0x1055f,
1091         0x3500, 0xffffffff, 0x3f,
1092         0x3504, 0xffffffff, 0x10567,
1093         0x3500, 0xffffffff, 0x42,
1094         0x3504, 0xffffffff, 0x1056f,
1095         0x3500, 0xffffffff, 0x45,
1096         0x3504, 0xffffffff, 0x10572,
1097         0x3500, 0xffffffff, 0x48,
1098         0x3504, 0xffffffff, 0x20575,
1099         0x3500, 0xffffffff, 0x4c,
1100         0x3504, 0xffffffff, 0x190801,
1101         0x3500, 0xffffffff, 0x67,
1102         0x3504, 0xffffffff, 0x1082a,
1103         0x3500, 0xffffffff, 0x6a,
1104         0x3504, 0xffffffff, 0x1b082d,
1105         0x3500, 0xffffffff, 0x87,
1106         0x3504, 0xffffffff, 0x310851,
1107         0x3500, 0xffffffff, 0xba,
1108         0x3504, 0xffffffff, 0x891,
1109         0x3500, 0xffffffff, 0xbc,
1110         0x3504, 0xffffffff, 0x893,
1111         0x3500, 0xffffffff, 0xbe,
1112         0x3504, 0xffffffff, 0x20895,
1113         0x3500, 0xffffffff, 0xc2,
1114         0x3504, 0xffffffff, 0x20899,
1115         0x3500, 0xffffffff, 0xc6,
1116         0x3504, 0xffffffff, 0x2089d,
1117         0x3500, 0xffffffff, 0xca,
1118         0x3504, 0xffffffff, 0x8a1,
1119         0x3500, 0xffffffff, 0xcc,
1120         0x3504, 0xffffffff, 0x8a3,
1121         0x3500, 0xffffffff, 0xce,
1122         0x3504, 0xffffffff, 0x308a5,
1123         0x3500, 0xffffffff, 0xd3,
1124         0x3504, 0xffffffff, 0x6d08cd,
1125         0x3500, 0xffffffff, 0x142,
1126         0x3504, 0xffffffff, 0x2000095a,
1127         0x3504, 0xffffffff, 0x1,
1128         0x3500, 0xffffffff, 0x144,
1129         0x3504, 0xffffffff, 0x301f095b,
1130         0x3500, 0xffffffff, 0x165,
1131         0x3504, 0xffffffff, 0xc094d,
1132         0x3500, 0xffffffff, 0x173,
1133         0x3504, 0xffffffff, 0xf096d,
1134         0x3500, 0xffffffff, 0x184,
1135         0x3504, 0xffffffff, 0x15097f,
1136         0x3500, 0xffffffff, 0x19b,
1137         0x3504, 0xffffffff, 0xc0998,
1138         0x3500, 0xffffffff, 0x1a9,
1139         0x3504, 0xffffffff, 0x409a7,
1140         0x3500, 0xffffffff, 0x1af,
1141         0x3504, 0xffffffff, 0xcdc,
1142         0x3500, 0xffffffff, 0x1b1,
1143         0x3504, 0xffffffff, 0x800,
1144         0x3508, 0xffffffff, 0x6c9b2000,
1145         0x3510, 0xfc00, 0x2000,
1146         0x3544, 0xffffffff, 0xfc0,
1147         0x28d4, 0x00000100, 0x100
1148 };
1149
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152         switch (rdev->family) {
1153         case CHIP_TAHITI:
1154                 radeon_program_register_sequence(rdev,
1155                                                  tahiti_golden_registers,
1156                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157                 radeon_program_register_sequence(rdev,
1158                                                  tahiti_golden_rlc_registers,
1159                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160                 radeon_program_register_sequence(rdev,
1161                                                  tahiti_mgcg_cgcg_init,
1162                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163                 radeon_program_register_sequence(rdev,
1164                                                  tahiti_golden_registers2,
1165                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166                 break;
1167         case CHIP_PITCAIRN:
1168                 radeon_program_register_sequence(rdev,
1169                                                  pitcairn_golden_registers,
1170                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171                 radeon_program_register_sequence(rdev,
1172                                                  pitcairn_golden_rlc_registers,
1173                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174                 radeon_program_register_sequence(rdev,
1175                                                  pitcairn_mgcg_cgcg_init,
1176                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177                 break;
1178         case CHIP_VERDE:
1179                 radeon_program_register_sequence(rdev,
1180                                                  verde_golden_registers,
1181                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1182                 radeon_program_register_sequence(rdev,
1183                                                  verde_golden_rlc_registers,
1184                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185                 radeon_program_register_sequence(rdev,
1186                                                  verde_mgcg_cgcg_init,
1187                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188                 radeon_program_register_sequence(rdev,
1189                                                  verde_pg_init,
1190                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1191                 break;
1192         case CHIP_OLAND:
1193                 radeon_program_register_sequence(rdev,
1194                                                  oland_golden_registers,
1195                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1196                 radeon_program_register_sequence(rdev,
1197                                                  oland_golden_rlc_registers,
1198                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  oland_mgcg_cgcg_init,
1201                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202                 break;
1203         case CHIP_HAINAN:
1204                 radeon_program_register_sequence(rdev,
1205                                                  hainan_golden_registers,
1206                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1207                 radeon_program_register_sequence(rdev,
1208                                                  hainan_golden_registers2,
1209                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210                 radeon_program_register_sequence(rdev,
1211                                                  hainan_mgcg_cgcg_init,
1212                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213                 break;
1214         default:
1215                 break;
1216         }
1217 }
1218
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233         u32 tmp;
1234
1235         tmp = RREG32(CG_CLKPIN_CNTL_2);
1236         if (tmp & MUX_TCLK_TO_XCLK)
1237                 return TCLK;
1238
1239         tmp = RREG32(CG_CLKPIN_CNTL);
1240         if (tmp & XTALIN_DIVIDE)
1241                 return reference_clock / 4;
1242
1243         return reference_clock;
1244 }
1245
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249         u32 temp;
1250         int actual_temp = 0;
1251
1252         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253                 CTF_TEMP_SHIFT;
1254
1255         if (temp & 0x200)
1256                 actual_temp = 255;
1257         else
1258                 actual_temp = temp & 0x1ff;
1259
1260         actual_temp = (actual_temp * 1000);
1261
1262         return actual_temp;
1263 }
1264
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268         {0x0000006f, 0x03044000},
1269         {0x00000070, 0x0480c018},
1270         {0x00000071, 0x00000040},
1271         {0x00000072, 0x01000000},
1272         {0x00000074, 0x000000ff},
1273         {0x00000075, 0x00143400},
1274         {0x00000076, 0x08ec0800},
1275         {0x00000077, 0x040000cc},
1276         {0x00000079, 0x00000000},
1277         {0x0000007a, 0x21000409},
1278         {0x0000007c, 0x00000000},
1279         {0x0000007d, 0xe8000000},
1280         {0x0000007e, 0x044408a8},
1281         {0x0000007f, 0x00000003},
1282         {0x00000080, 0x00000000},
1283         {0x00000081, 0x01000000},
1284         {0x00000082, 0x02000000},
1285         {0x00000083, 0x00000000},
1286         {0x00000084, 0xe3f3e4f4},
1287         {0x00000085, 0x00052024},
1288         {0x00000087, 0x00000000},
1289         {0x00000088, 0x66036603},
1290         {0x00000089, 0x01000000},
1291         {0x0000008b, 0x1c0a0000},
1292         {0x0000008c, 0xff010000},
1293         {0x0000008e, 0xffffefff},
1294         {0x0000008f, 0xfff3efff},
1295         {0x00000090, 0xfff3efbf},
1296         {0x00000094, 0x00101101},
1297         {0x00000095, 0x00000fff},
1298         {0x00000096, 0x00116fff},
1299         {0x00000097, 0x60010000},
1300         {0x00000098, 0x10010000},
1301         {0x00000099, 0x00006000},
1302         {0x0000009a, 0x00001000},
1303         {0x0000009f, 0x00a77400}
1304 };
1305
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307         {0x0000006f, 0x03044000},
1308         {0x00000070, 0x0480c018},
1309         {0x00000071, 0x00000040},
1310         {0x00000072, 0x01000000},
1311         {0x00000074, 0x000000ff},
1312         {0x00000075, 0x00143400},
1313         {0x00000076, 0x08ec0800},
1314         {0x00000077, 0x040000cc},
1315         {0x00000079, 0x00000000},
1316         {0x0000007a, 0x21000409},
1317         {0x0000007c, 0x00000000},
1318         {0x0000007d, 0xe8000000},
1319         {0x0000007e, 0x044408a8},
1320         {0x0000007f, 0x00000003},
1321         {0x00000080, 0x00000000},
1322         {0x00000081, 0x01000000},
1323         {0x00000082, 0x02000000},
1324         {0x00000083, 0x00000000},
1325         {0x00000084, 0xe3f3e4f4},
1326         {0x00000085, 0x00052024},
1327         {0x00000087, 0x00000000},
1328         {0x00000088, 0x66036603},
1329         {0x00000089, 0x01000000},
1330         {0x0000008b, 0x1c0a0000},
1331         {0x0000008c, 0xff010000},
1332         {0x0000008e, 0xffffefff},
1333         {0x0000008f, 0xfff3efff},
1334         {0x00000090, 0xfff3efbf},
1335         {0x00000094, 0x00101101},
1336         {0x00000095, 0x00000fff},
1337         {0x00000096, 0x00116fff},
1338         {0x00000097, 0x60010000},
1339         {0x00000098, 0x10010000},
1340         {0x00000099, 0x00006000},
1341         {0x0000009a, 0x00001000},
1342         {0x0000009f, 0x00a47400}
1343 };
1344
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346         {0x0000006f, 0x03044000},
1347         {0x00000070, 0x0480c018},
1348         {0x00000071, 0x00000040},
1349         {0x00000072, 0x01000000},
1350         {0x00000074, 0x000000ff},
1351         {0x00000075, 0x00143400},
1352         {0x00000076, 0x08ec0800},
1353         {0x00000077, 0x040000cc},
1354         {0x00000079, 0x00000000},
1355         {0x0000007a, 0x21000409},
1356         {0x0000007c, 0x00000000},
1357         {0x0000007d, 0xe8000000},
1358         {0x0000007e, 0x044408a8},
1359         {0x0000007f, 0x00000003},
1360         {0x00000080, 0x00000000},
1361         {0x00000081, 0x01000000},
1362         {0x00000082, 0x02000000},
1363         {0x00000083, 0x00000000},
1364         {0x00000084, 0xe3f3e4f4},
1365         {0x00000085, 0x00052024},
1366         {0x00000087, 0x00000000},
1367         {0x00000088, 0x66036603},
1368         {0x00000089, 0x01000000},
1369         {0x0000008b, 0x1c0a0000},
1370         {0x0000008c, 0xff010000},
1371         {0x0000008e, 0xffffefff},
1372         {0x0000008f, 0xfff3efff},
1373         {0x00000090, 0xfff3efbf},
1374         {0x00000094, 0x00101101},
1375         {0x00000095, 0x00000fff},
1376         {0x00000096, 0x00116fff},
1377         {0x00000097, 0x60010000},
1378         {0x00000098, 0x10010000},
1379         {0x00000099, 0x00006000},
1380         {0x0000009a, 0x00001000},
1381         {0x0000009f, 0x00a37400}
1382 };
1383
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385         {0x0000006f, 0x03044000},
1386         {0x00000070, 0x0480c018},
1387         {0x00000071, 0x00000040},
1388         {0x00000072, 0x01000000},
1389         {0x00000074, 0x000000ff},
1390         {0x00000075, 0x00143400},
1391         {0x00000076, 0x08ec0800},
1392         {0x00000077, 0x040000cc},
1393         {0x00000079, 0x00000000},
1394         {0x0000007a, 0x21000409},
1395         {0x0000007c, 0x00000000},
1396         {0x0000007d, 0xe8000000},
1397         {0x0000007e, 0x044408a8},
1398         {0x0000007f, 0x00000003},
1399         {0x00000080, 0x00000000},
1400         {0x00000081, 0x01000000},
1401         {0x00000082, 0x02000000},
1402         {0x00000083, 0x00000000},
1403         {0x00000084, 0xe3f3e4f4},
1404         {0x00000085, 0x00052024},
1405         {0x00000087, 0x00000000},
1406         {0x00000088, 0x66036603},
1407         {0x00000089, 0x01000000},
1408         {0x0000008b, 0x1c0a0000},
1409         {0x0000008c, 0xff010000},
1410         {0x0000008e, 0xffffefff},
1411         {0x0000008f, 0xfff3efff},
1412         {0x00000090, 0xfff3efbf},
1413         {0x00000094, 0x00101101},
1414         {0x00000095, 0x00000fff},
1415         {0x00000096, 0x00116fff},
1416         {0x00000097, 0x60010000},
1417         {0x00000098, 0x10010000},
1418         {0x00000099, 0x00006000},
1419         {0x0000009a, 0x00001000},
1420         {0x0000009f, 0x00a17730}
1421 };
1422
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424         {0x0000006f, 0x03044000},
1425         {0x00000070, 0x0480c018},
1426         {0x00000071, 0x00000040},
1427         {0x00000072, 0x01000000},
1428         {0x00000074, 0x000000ff},
1429         {0x00000075, 0x00143400},
1430         {0x00000076, 0x08ec0800},
1431         {0x00000077, 0x040000cc},
1432         {0x00000079, 0x00000000},
1433         {0x0000007a, 0x21000409},
1434         {0x0000007c, 0x00000000},
1435         {0x0000007d, 0xe8000000},
1436         {0x0000007e, 0x044408a8},
1437         {0x0000007f, 0x00000003},
1438         {0x00000080, 0x00000000},
1439         {0x00000081, 0x01000000},
1440         {0x00000082, 0x02000000},
1441         {0x00000083, 0x00000000},
1442         {0x00000084, 0xe3f3e4f4},
1443         {0x00000085, 0x00052024},
1444         {0x00000087, 0x00000000},
1445         {0x00000088, 0x66036603},
1446         {0x00000089, 0x01000000},
1447         {0x0000008b, 0x1c0a0000},
1448         {0x0000008c, 0xff010000},
1449         {0x0000008e, 0xffffefff},
1450         {0x0000008f, 0xfff3efff},
1451         {0x00000090, 0xfff3efbf},
1452         {0x00000094, 0x00101101},
1453         {0x00000095, 0x00000fff},
1454         {0x00000096, 0x00116fff},
1455         {0x00000097, 0x60010000},
1456         {0x00000098, 0x10010000},
1457         {0x00000099, 0x00006000},
1458         {0x0000009a, 0x00001000},
1459         {0x0000009f, 0x00a07730}
1460 };
1461
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465         const __be32 *fw_data;
1466         u32 running, blackout = 0;
1467         u32 *io_mc_regs;
1468         int i, ucode_size, regs_size;
1469
1470         if (!rdev->mc_fw)
1471                 return -EINVAL;
1472
1473         switch (rdev->family) {
1474         case CHIP_TAHITI:
1475                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476                 ucode_size = SI_MC_UCODE_SIZE;
1477                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1478                 break;
1479         case CHIP_PITCAIRN:
1480                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_VERDE:
1485         default:
1486                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1487                 ucode_size = SI_MC_UCODE_SIZE;
1488                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1489                 break;
1490         case CHIP_OLAND:
1491                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1492                 ucode_size = OLAND_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         case CHIP_HAINAN:
1496                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497                 ucode_size = OLAND_MC_UCODE_SIZE;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         }
1501
1502         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503
1504         if (running == 0) {
1505                 if (running) {
1506                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508                 }
1509
1510                 /* reset the engine and set to writable */
1511                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513
1514                 /* load mc io regs */
1515                 for (i = 0; i < regs_size; i++) {
1516                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518                 }
1519                 /* load the MC ucode */
1520                 fw_data = (const __be32 *)rdev->mc_fw->data;
1521                 for (i = 0; i < ucode_size; i++)
1522                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523
1524                 /* put the engine back into the active state */
1525                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528
1529                 /* wait for training to complete */
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532                                 break;
1533                         udelay(1);
1534                 }
1535                 for (i = 0; i < rdev->usec_timeout; i++) {
1536                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537                                 break;
1538                         udelay(1);
1539                 }
1540
1541                 if (running)
1542                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543         }
1544
1545         return 0;
1546 }
1547
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550         const char *chip_name;
1551         const char *rlc_chip_name;
1552         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553         size_t smc_req_size;
1554         char fw_name[30];
1555         int err;
1556
1557         DRM_DEBUG("\n");
1558
1559         switch (rdev->family) {
1560         case CHIP_TAHITI:
1561                 chip_name = "TAHITI";
1562                 rlc_chip_name = "TAHITI";
1563                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1565                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1566                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1568                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569                 break;
1570         case CHIP_PITCAIRN:
1571                 chip_name = "PITCAIRN";
1572                 rlc_chip_name = "PITCAIRN";
1573                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1575                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1576                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1578                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579                 break;
1580         case CHIP_VERDE:
1581                 chip_name = "VERDE";
1582                 rlc_chip_name = "VERDE";
1583                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1585                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1586                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1588                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589                 break;
1590         case CHIP_OLAND:
1591                 chip_name = "OLAND";
1592                 rlc_chip_name = "OLAND";
1593                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1595                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1596                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599                 break;
1600         case CHIP_HAINAN:
1601                 chip_name = "HAINAN";
1602                 rlc_chip_name = "HAINAN";
1603                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1605                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1606                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609                 break;
1610         default: BUG();
1611         }
1612
1613         DRM_INFO("Loading %s Microcode\n", chip_name);
1614
1615         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617         if (err)
1618                 goto out;
1619         if (rdev->pfp_fw->size != pfp_req_size) {
1620                 printk(KERN_ERR
1621                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622                        rdev->pfp_fw->size, fw_name);
1623                 err = -EINVAL;
1624                 goto out;
1625         }
1626
1627         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629         if (err)
1630                 goto out;
1631         if (rdev->me_fw->size != me_req_size) {
1632                 printk(KERN_ERR
1633                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634                        rdev->me_fw->size, fw_name);
1635                 err = -EINVAL;
1636         }
1637
1638         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640         if (err)
1641                 goto out;
1642         if (rdev->ce_fw->size != ce_req_size) {
1643                 printk(KERN_ERR
1644                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645                        rdev->ce_fw->size, fw_name);
1646                 err = -EINVAL;
1647         }
1648
1649         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651         if (err)
1652                 goto out;
1653         if (rdev->rlc_fw->size != rlc_req_size) {
1654                 printk(KERN_ERR
1655                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656                        rdev->rlc_fw->size, fw_name);
1657                 err = -EINVAL;
1658         }
1659
1660         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662         if (err)
1663                 goto out;
1664         if (rdev->mc_fw->size != mc_req_size) {
1665                 printk(KERN_ERR
1666                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667                        rdev->mc_fw->size, fw_name);
1668                 err = -EINVAL;
1669         }
1670
1671         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673         if (err) {
1674                 printk(KERN_ERR
1675                        "smc: error loading firmware \"%s\"\n",
1676                        fw_name);
1677                 release_firmware(rdev->smc_fw);
1678                 rdev->smc_fw = NULL;
1679         } else if (rdev->smc_fw->size != smc_req_size) {
1680                 printk(KERN_ERR
1681                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1682                        rdev->smc_fw->size, fw_name);
1683                 err = -EINVAL;
1684         }
1685
1686 out:
1687         if (err) {
1688                 if (err != -EINVAL)
1689                         printk(KERN_ERR
1690                                "si_cp: Failed to load firmware \"%s\"\n",
1691                                fw_name);
1692                 release_firmware(rdev->pfp_fw);
1693                 rdev->pfp_fw = NULL;
1694                 release_firmware(rdev->me_fw);
1695                 rdev->me_fw = NULL;
1696                 release_firmware(rdev->ce_fw);
1697                 rdev->ce_fw = NULL;
1698                 release_firmware(rdev->rlc_fw);
1699                 rdev->rlc_fw = NULL;
1700                 release_firmware(rdev->mc_fw);
1701                 rdev->mc_fw = NULL;
1702                 release_firmware(rdev->smc_fw);
1703                 rdev->smc_fw = NULL;
1704         }
1705         return err;
1706 }
1707
1708 /* watermark setup */
1709 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1710                                    struct radeon_crtc *radeon_crtc,
1711                                    struct drm_display_mode *mode,
1712                                    struct drm_display_mode *other_mode)
1713 {
1714         u32 tmp;
1715         /*
1716          * Line Buffer Setup
1717          * There are 3 line buffers, each one shared by 2 display controllers.
1718          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1719          * the display controllers.  The paritioning is done via one of four
1720          * preset allocations specified in bits 21:20:
1721          *  0 - half lb
1722          *  2 - whole lb, other crtc must be disabled
1723          */
1724         /* this can get tricky if we have two large displays on a paired group
1725          * of crtcs.  Ideally for multiple large displays we'd assign them to
1726          * non-linked crtcs for maximum line buffer allocation.
1727          */
1728         if (radeon_crtc->base.enabled && mode) {
1729                 if (other_mode)
1730                         tmp = 0; /* 1/2 */
1731                 else
1732                         tmp = 2; /* whole */
1733         } else
1734                 tmp = 0;
1735
1736         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1737                DC_LB_MEMORY_CONFIG(tmp));
1738
1739         if (radeon_crtc->base.enabled && mode) {
1740                 switch (tmp) {
1741                 case 0:
1742                 default:
1743                         return 4096 * 2;
1744                 case 2:
1745                         return 8192 * 2;
1746                 }
1747         }
1748
1749         /* controller not enabled, so no lb used */
1750         return 0;
1751 }
1752
1753 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1754 {
1755         u32 tmp = RREG32(MC_SHARED_CHMAP);
1756
1757         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1758         case 0:
1759         default:
1760                 return 1;
1761         case 1:
1762                 return 2;
1763         case 2:
1764                 return 4;
1765         case 3:
1766                 return 8;
1767         case 4:
1768                 return 3;
1769         case 5:
1770                 return 6;
1771         case 6:
1772                 return 10;
1773         case 7:
1774                 return 12;
1775         case 8:
1776                 return 16;
1777         }
1778 }
1779
1780 struct dce6_wm_params {
1781         u32 dram_channels; /* number of dram channels */
1782         u32 yclk;          /* bandwidth per dram data pin in kHz */
1783         u32 sclk;          /* engine clock in kHz */
1784         u32 disp_clk;      /* display clock in kHz */
1785         u32 src_width;     /* viewport width */
1786         u32 active_time;   /* active display time in ns */
1787         u32 blank_time;    /* blank time in ns */
1788         bool interlaced;    /* mode is interlaced */
1789         fixed20_12 vsc;    /* vertical scale ratio */
1790         u32 num_heads;     /* number of active crtcs */
1791         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1792         u32 lb_size;       /* line buffer allocated to pipe */
1793         u32 vtaps;         /* vertical scaler taps */
1794 };
1795
1796 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1797 {
1798         /* Calculate raw DRAM Bandwidth */
1799         fixed20_12 dram_efficiency; /* 0.7 */
1800         fixed20_12 yclk, dram_channels, bandwidth;
1801         fixed20_12 a;
1802
1803         a.full = dfixed_const(1000);
1804         yclk.full = dfixed_const(wm->yclk);
1805         yclk.full = dfixed_div(yclk, a);
1806         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1807         a.full = dfixed_const(10);
1808         dram_efficiency.full = dfixed_const(7);
1809         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1810         bandwidth.full = dfixed_mul(dram_channels, yclk);
1811         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1812
1813         return dfixed_trunc(bandwidth);
1814 }
1815
1816 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1817 {
1818         /* Calculate DRAM Bandwidth and the part allocated to display. */
1819         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1820         fixed20_12 yclk, dram_channels, bandwidth;
1821         fixed20_12 a;
1822
1823         a.full = dfixed_const(1000);
1824         yclk.full = dfixed_const(wm->yclk);
1825         yclk.full = dfixed_div(yclk, a);
1826         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1827         a.full = dfixed_const(10);
1828         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1829         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1830         bandwidth.full = dfixed_mul(dram_channels, yclk);
1831         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1832
1833         return dfixed_trunc(bandwidth);
1834 }
1835
1836 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1837 {
1838         /* Calculate the display Data return Bandwidth */
1839         fixed20_12 return_efficiency; /* 0.8 */
1840         fixed20_12 sclk, bandwidth;
1841         fixed20_12 a;
1842
1843         a.full = dfixed_const(1000);
1844         sclk.full = dfixed_const(wm->sclk);
1845         sclk.full = dfixed_div(sclk, a);
1846         a.full = dfixed_const(10);
1847         return_efficiency.full = dfixed_const(8);
1848         return_efficiency.full = dfixed_div(return_efficiency, a);
1849         a.full = dfixed_const(32);
1850         bandwidth.full = dfixed_mul(a, sclk);
1851         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1852
1853         return dfixed_trunc(bandwidth);
1854 }
1855
1856 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1857 {
1858         return 32;
1859 }
1860
1861 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1862 {
1863         /* Calculate the DMIF Request Bandwidth */
1864         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1865         fixed20_12 disp_clk, sclk, bandwidth;
1866         fixed20_12 a, b1, b2;
1867         u32 min_bandwidth;
1868
1869         a.full = dfixed_const(1000);
1870         disp_clk.full = dfixed_const(wm->disp_clk);
1871         disp_clk.full = dfixed_div(disp_clk, a);
1872         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1873         b1.full = dfixed_mul(a, disp_clk);
1874
1875         a.full = dfixed_const(1000);
1876         sclk.full = dfixed_const(wm->sclk);
1877         sclk.full = dfixed_div(sclk, a);
1878         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1879         b2.full = dfixed_mul(a, sclk);
1880
1881         a.full = dfixed_const(10);
1882         disp_clk_request_efficiency.full = dfixed_const(8);
1883         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1884
1885         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1886
1887         a.full = dfixed_const(min_bandwidth);
1888         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1889
1890         return dfixed_trunc(bandwidth);
1891 }
1892
1893 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1894 {
1895         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1896         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1897         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1898         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1899
1900         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1901 }
1902
1903 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1904 {
1905         /* Calculate the display mode Average Bandwidth
1906          * DisplayMode should contain the source and destination dimensions,
1907          * timing, etc.
1908          */
1909         fixed20_12 bpp;
1910         fixed20_12 line_time;
1911         fixed20_12 src_width;
1912         fixed20_12 bandwidth;
1913         fixed20_12 a;
1914
1915         a.full = dfixed_const(1000);
1916         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1917         line_time.full = dfixed_div(line_time, a);
1918         bpp.full = dfixed_const(wm->bytes_per_pixel);
1919         src_width.full = dfixed_const(wm->src_width);
1920         bandwidth.full = dfixed_mul(src_width, bpp);
1921         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1922         bandwidth.full = dfixed_div(bandwidth, line_time);
1923
1924         return dfixed_trunc(bandwidth);
1925 }
1926
1927 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1928 {
1929         /* First calcualte the latency in ns */
1930         u32 mc_latency = 2000; /* 2000 ns. */
1931         u32 available_bandwidth = dce6_available_bandwidth(wm);
1932         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1933         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1934         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1935         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1936                 (wm->num_heads * cursor_line_pair_return_time);
1937         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1938         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1939         u32 tmp, dmif_size = 12288;
1940         fixed20_12 a, b, c;
1941
1942         if (wm->num_heads == 0)
1943                 return 0;
1944
1945         a.full = dfixed_const(2);
1946         b.full = dfixed_const(1);
1947         if ((wm->vsc.full > a.full) ||
1948             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1949             (wm->vtaps >= 5) ||
1950             ((wm->vsc.full >= a.full) && wm->interlaced))
1951                 max_src_lines_per_dst_line = 4;
1952         else
1953                 max_src_lines_per_dst_line = 2;
1954
1955         a.full = dfixed_const(available_bandwidth);
1956         b.full = dfixed_const(wm->num_heads);
1957         a.full = dfixed_div(a, b);
1958
1959         b.full = dfixed_const(mc_latency + 512);
1960         c.full = dfixed_const(wm->disp_clk);
1961         b.full = dfixed_div(b, c);
1962
1963         c.full = dfixed_const(dmif_size);
1964         b.full = dfixed_div(c, b);
1965
1966         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1967
1968         b.full = dfixed_const(1000);
1969         c.full = dfixed_const(wm->disp_clk);
1970         b.full = dfixed_div(c, b);
1971         c.full = dfixed_const(wm->bytes_per_pixel);
1972         b.full = dfixed_mul(b, c);
1973
1974         lb_fill_bw = min(tmp, dfixed_trunc(b));
1975
1976         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1977         b.full = dfixed_const(1000);
1978         c.full = dfixed_const(lb_fill_bw);
1979         b.full = dfixed_div(c, b);
1980         a.full = dfixed_div(a, b);
1981         line_fill_time = dfixed_trunc(a);
1982
1983         if (line_fill_time < wm->active_time)
1984                 return latency;
1985         else
1986                 return latency + (line_fill_time - wm->active_time);
1987
1988 }
1989
1990 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1991 {
1992         if (dce6_average_bandwidth(wm) <=
1993             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1994                 return true;
1995         else
1996                 return false;
1997 };
1998
1999 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2000 {
2001         if (dce6_average_bandwidth(wm) <=
2002             (dce6_available_bandwidth(wm) / wm->num_heads))
2003                 return true;
2004         else
2005                 return false;
2006 };
2007
2008 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2009 {
2010         u32 lb_partitions = wm->lb_size / wm->src_width;
2011         u32 line_time = wm->active_time + wm->blank_time;
2012         u32 latency_tolerant_lines;
2013         u32 latency_hiding;
2014         fixed20_12 a;
2015
2016         a.full = dfixed_const(1);
2017         if (wm->vsc.full > a.full)
2018                 latency_tolerant_lines = 1;
2019         else {
2020                 if (lb_partitions <= (wm->vtaps + 1))
2021                         latency_tolerant_lines = 1;
2022                 else
2023                         latency_tolerant_lines = 2;
2024         }
2025
2026         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2027
2028         if (dce6_latency_watermark(wm) <= latency_hiding)
2029                 return true;
2030         else
2031                 return false;
2032 }
2033
2034 static void dce6_program_watermarks(struct radeon_device *rdev,
2035                                          struct radeon_crtc *radeon_crtc,
2036                                          u32 lb_size, u32 num_heads)
2037 {
2038         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2039         struct dce6_wm_params wm_low, wm_high;
2040         u32 dram_channels;
2041         u32 pixel_period;
2042         u32 line_time = 0;
2043         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2044         u32 priority_a_mark = 0, priority_b_mark = 0;
2045         u32 priority_a_cnt = PRIORITY_OFF;
2046         u32 priority_b_cnt = PRIORITY_OFF;
2047         u32 tmp, arb_control3;
2048         fixed20_12 a, b, c;
2049
2050         if (radeon_crtc->base.enabled && num_heads && mode) {
2051                 pixel_period = 1000000 / (u32)mode->clock;
2052                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2053                 priority_a_cnt = 0;
2054                 priority_b_cnt = 0;
2055
2056                 if (rdev->family == CHIP_ARUBA)
2057                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2058                 else
2059                         dram_channels = si_get_number_of_dram_channels(rdev);
2060
2061                 /* watermark for high clocks */
2062                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2063                         wm_high.yclk =
2064                                 radeon_dpm_get_mclk(rdev, false) * 10;
2065                         wm_high.sclk =
2066                                 radeon_dpm_get_sclk(rdev, false) * 10;
2067                 } else {
2068                         wm_high.yclk = rdev->pm.current_mclk * 10;
2069                         wm_high.sclk = rdev->pm.current_sclk * 10;
2070                 }
2071
2072                 wm_high.disp_clk = mode->clock;
2073                 wm_high.src_width = mode->crtc_hdisplay;
2074                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2075                 wm_high.blank_time = line_time - wm_high.active_time;
2076                 wm_high.interlaced = false;
2077                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2078                         wm_high.interlaced = true;
2079                 wm_high.vsc = radeon_crtc->vsc;
2080                 wm_high.vtaps = 1;
2081                 if (radeon_crtc->rmx_type != RMX_OFF)
2082                         wm_high.vtaps = 2;
2083                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2084                 wm_high.lb_size = lb_size;
2085                 wm_high.dram_channels = dram_channels;
2086                 wm_high.num_heads = num_heads;
2087
2088                 /* watermark for low clocks */
2089                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2090                         wm_low.yclk =
2091                                 radeon_dpm_get_mclk(rdev, true) * 10;
2092                         wm_low.sclk =
2093                                 radeon_dpm_get_sclk(rdev, true) * 10;
2094                 } else {
2095                         wm_low.yclk = rdev->pm.current_mclk * 10;
2096                         wm_low.sclk = rdev->pm.current_sclk * 10;
2097                 }
2098
2099                 wm_low.disp_clk = mode->clock;
2100                 wm_low.src_width = mode->crtc_hdisplay;
2101                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2102                 wm_low.blank_time = line_time - wm_low.active_time;
2103                 wm_low.interlaced = false;
2104                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2105                         wm_low.interlaced = true;
2106                 wm_low.vsc = radeon_crtc->vsc;
2107                 wm_low.vtaps = 1;
2108                 if (radeon_crtc->rmx_type != RMX_OFF)
2109                         wm_low.vtaps = 2;
2110                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2111                 wm_low.lb_size = lb_size;
2112                 wm_low.dram_channels = dram_channels;
2113                 wm_low.num_heads = num_heads;
2114
2115                 /* set for high clocks */
2116                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2117                 /* set for low clocks */
2118                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2119
2120                 /* possibly force display priority to high */
2121                 /* should really do this at mode validation time... */
2122                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2123                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2124                     !dce6_check_latency_hiding(&wm_high) ||
2125                     (rdev->disp_priority == 2)) {
2126                         DRM_DEBUG_KMS("force priority to high\n");
2127                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2128                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2129                 }
2130                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2131                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2132                     !dce6_check_latency_hiding(&wm_low) ||
2133                     (rdev->disp_priority == 2)) {
2134                         DRM_DEBUG_KMS("force priority to high\n");
2135                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2136                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2137                 }
2138
2139                 a.full = dfixed_const(1000);
2140                 b.full = dfixed_const(mode->clock);
2141                 b.full = dfixed_div(b, a);
2142                 c.full = dfixed_const(latency_watermark_a);
2143                 c.full = dfixed_mul(c, b);
2144                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2145                 c.full = dfixed_div(c, a);
2146                 a.full = dfixed_const(16);
2147                 c.full = dfixed_div(c, a);
2148                 priority_a_mark = dfixed_trunc(c);
2149                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2150
2151                 a.full = dfixed_const(1000);
2152                 b.full = dfixed_const(mode->clock);
2153                 b.full = dfixed_div(b, a);
2154                 c.full = dfixed_const(latency_watermark_b);
2155                 c.full = dfixed_mul(c, b);
2156                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2157                 c.full = dfixed_div(c, a);
2158                 a.full = dfixed_const(16);
2159                 c.full = dfixed_div(c, a);
2160                 priority_b_mark = dfixed_trunc(c);
2161                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2162         }
2163
2164         /* select wm A */
2165         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2166         tmp = arb_control3;
2167         tmp &= ~LATENCY_WATERMARK_MASK(3);
2168         tmp |= LATENCY_WATERMARK_MASK(1);
2169         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2170         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2171                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2172                 LATENCY_HIGH_WATERMARK(line_time)));
2173         /* select wm B */
2174         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2175         tmp &= ~LATENCY_WATERMARK_MASK(3);
2176         tmp |= LATENCY_WATERMARK_MASK(2);
2177         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2178         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2179                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2180                 LATENCY_HIGH_WATERMARK(line_time)));
2181         /* restore original selection */
2182         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2183
2184         /* write the priority marks */
2185         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2186         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2187
2188         /* save values for DPM */
2189         radeon_crtc->line_time = line_time;
2190         radeon_crtc->wm_high = latency_watermark_a;
2191         radeon_crtc->wm_low = latency_watermark_b;
2192 }
2193
2194 void dce6_bandwidth_update(struct radeon_device *rdev)
2195 {
2196         struct drm_display_mode *mode0 = NULL;
2197         struct drm_display_mode *mode1 = NULL;
2198         u32 num_heads = 0, lb_size;
2199         int i;
2200
2201         radeon_update_display_priority(rdev);
2202
2203         for (i = 0; i < rdev->num_crtc; i++) {
2204                 if (rdev->mode_info.crtcs[i]->base.enabled)
2205                         num_heads++;
2206         }
2207         for (i = 0; i < rdev->num_crtc; i += 2) {
2208                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2209                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2210                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2211                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2212                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2213                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2214         }
2215 }
2216
2217 /*
2218  * Core functions
2219  */
2220 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2221 {
2222         const u32 num_tile_mode_states = 32;
2223         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2224
2225         switch (rdev->config.si.mem_row_size_in_kb) {
2226         case 1:
2227                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2228                 break;
2229         case 2:
2230         default:
2231                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2232                 break;
2233         case 4:
2234                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2235                 break;
2236         }
2237
2238         if ((rdev->family == CHIP_TAHITI) ||
2239             (rdev->family == CHIP_PITCAIRN)) {
2240                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2241                         switch (reg_offset) {
2242                         case 0:  /* non-AA compressed depth or any compressed stencil */
2243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2245                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2246                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2247                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2248                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2251                                 break;
2252                         case 1:  /* 2xAA/4xAA compressed depth only */
2253                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2255                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2256                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2257                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2258                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2261                                 break;
2262                         case 2:  /* 8xAA compressed depth only */
2263                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2266                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2267                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2268                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2271                                 break;
2272                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2273                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2275                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2276                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2277                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2278                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2281                                 break;
2282                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2284                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2285                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2286                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2287                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2288                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2291                                 break;
2292                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2293                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2295                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2296                                                  TILE_SPLIT(split_equal_to_row_size) |
2297                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2298                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2301                                 break;
2302                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2303                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2305                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2306                                                  TILE_SPLIT(split_equal_to_row_size) |
2307                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2308                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2311                                 break;
2312                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2313                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2315                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2316                                                  TILE_SPLIT(split_equal_to_row_size) |
2317                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2318                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2321                                 break;
2322                         case 8:  /* 1D and 1D Array Surfaces */
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2324                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2325                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2327                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2328                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2331                                 break;
2332                         case 9:  /* Displayable maps. */
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2337                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2338                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2340                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2341                                 break;
2342                         case 10:  /* Display 8bpp. */
2343                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2346                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2347                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2348                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2349                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2351                                 break;
2352                         case 11:  /* Display 16bpp. */
2353                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2356                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2357                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2358                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2361                                 break;
2362                         case 12:  /* Display 32bpp. */
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2366                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2367                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2368                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2371                                 break;
2372                         case 13:  /* Thin. */
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2376                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2377                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2378                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2381                                 break;
2382                         case 14:  /* Thin 8 bpp. */
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2386                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2387                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2388                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2390                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2391                                 break;
2392                         case 15:  /* Thin 16 bpp. */
2393                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2395                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2396                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2397                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2398                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2401                                 break;
2402                         case 16:  /* Thin 32 bpp. */
2403                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2406                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2407                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2408                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2411                                 break;
2412                         case 17:  /* Thin 64 bpp. */
2413                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2414                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2415                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2416                                                  TILE_SPLIT(split_equal_to_row_size) |
2417                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2418                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2421                                 break;
2422                         case 21:  /* 8 bpp PRT. */
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2426                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2428                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2429                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2431                                 break;
2432                         case 22:  /* 16 bpp PRT */
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2436                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2437                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2438                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2441                                 break;
2442                         case 23:  /* 32 bpp PRT */
2443                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2445                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2446                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2448                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2451                                 break;
2452                         case 24:  /* 64 bpp PRT */
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2457                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2458                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                                 break;
2462                         case 25:  /* 128 bpp PRT */
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2466                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2467                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2468                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2471                                 break;
2472                         default:
2473                                 gb_tile_moden = 0;
2474                                 break;
2475                         }
2476                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2477                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2478                 }
2479         } else if ((rdev->family == CHIP_VERDE) ||
2480                    (rdev->family == CHIP_OLAND) ||
2481                    (rdev->family == CHIP_HAINAN)) {
2482                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2483                         switch (reg_offset) {
2484                         case 0:  /* non-AA compressed depth or any compressed stencil */
2485                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2488                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2490                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2493                                 break;
2494                         case 1:  /* 2xAA/4xAA compressed depth only */
2495                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2497                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2498                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2500                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2503                                 break;
2504                         case 2:  /* 8xAA compressed depth only */
2505                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2508                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2509                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2510                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2513                                 break;
2514                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2515                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2518                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2520                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2523                                 break;
2524                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2526                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2530                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533                                 break;
2534                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2535                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2537                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2538                                                  TILE_SPLIT(split_equal_to_row_size) |
2539                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2540                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2543                                 break;
2544                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2548                                                  TILE_SPLIT(split_equal_to_row_size) |
2549                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2550                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553                                 break;
2554                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558                                                  TILE_SPLIT(split_equal_to_row_size) |
2559                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2560                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2562                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2563                                 break;
2564                         case 8:  /* 1D and 1D Array Surfaces */
2565                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2566                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2567                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2569                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2570                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2573                                 break;
2574                         case 9:  /* Displayable maps. */
2575                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2577                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2579                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2580                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583                                 break;
2584                         case 10:  /* Display 8bpp. */
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2590                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2593                                 break;
2594                         case 11:  /* Display 16bpp. */
2595                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2600                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603                                 break;
2604                         case 12:  /* Display 32bpp. */
2605                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2609                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2610                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613                                 break;
2614                         case 13:  /* Thin. */
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2619                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2620                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623                                 break;
2624                         case 14:  /* Thin 8 bpp. */
2625                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2627                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2629                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2630                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633                                 break;
2634                         case 15:  /* Thin 16 bpp. */
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2639                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2640                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643                                 break;
2644                         case 16:  /* Thin 32 bpp. */
2645                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2649                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2650                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653                                 break;
2654                         case 17:  /* Thin 64 bpp. */
2655                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                                                  TILE_SPLIT(split_equal_to_row_size) |
2659                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2660                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663                                 break;
2664                         case 21:  /* 8 bpp PRT. */
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2669                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2670                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2671                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2672                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673                                 break;
2674                         case 22:  /* 16 bpp PRT */
2675                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2677                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2678                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2679                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2680                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2683                                 break;
2684                         case 23:  /* 32 bpp PRT */
2685                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2690                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693                                 break;
2694                         case 24:  /* 64 bpp PRT */
2695                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2700                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703                                 break;
2704                         case 25:  /* 128 bpp PRT */
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2709                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2710                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2713                                 break;
2714                         default:
2715                                 gb_tile_moden = 0;
2716                                 break;
2717                         }
2718                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2719                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2720                 }
2721         } else
2722                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2723 }
2724
2725 static void si_select_se_sh(struct radeon_device *rdev,
2726                             u32 se_num, u32 sh_num)
2727 {
2728         u32 data = INSTANCE_BROADCAST_WRITES;
2729
2730         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2731                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2732         else if (se_num == 0xffffffff)
2733                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2734         else if (sh_num == 0xffffffff)
2735                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2736         else
2737                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2738         WREG32(GRBM_GFX_INDEX, data);
2739 }
2740
2741 static u32 si_create_bitmask(u32 bit_width)
2742 {
2743         u32 i, mask = 0;
2744
2745         for (i = 0; i < bit_width; i++) {
2746                 mask <<= 1;
2747                 mask |= 1;
2748         }
2749         return mask;
2750 }
2751
2752 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2753 {
2754         u32 data, mask;
2755
2756         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2757         if (data & 1)
2758                 data &= INACTIVE_CUS_MASK;
2759         else
2760                 data = 0;
2761         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2762
2763         data >>= INACTIVE_CUS_SHIFT;
2764
2765         mask = si_create_bitmask(cu_per_sh);
2766
2767         return ~data & mask;
2768 }
2769
2770 static void si_setup_spi(struct radeon_device *rdev,
2771                          u32 se_num, u32 sh_per_se,
2772                          u32 cu_per_sh)
2773 {
2774         int i, j, k;
2775         u32 data, mask, active_cu;
2776
2777         for (i = 0; i < se_num; i++) {
2778                 for (j = 0; j < sh_per_se; j++) {
2779                         si_select_se_sh(rdev, i, j);
2780                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2781                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2782
2783                         mask = 1;
2784                         for (k = 0; k < 16; k++) {
2785                                 mask <<= k;
2786                                 if (active_cu & mask) {
2787                                         data &= ~mask;
2788                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2789                                         break;
2790                                 }
2791                         }
2792                 }
2793         }
2794         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2795 }
2796
2797 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2798                               u32 max_rb_num, u32 se_num,
2799                               u32 sh_per_se)
2800 {
2801         u32 data, mask;
2802
2803         data = RREG32(CC_RB_BACKEND_DISABLE);
2804         if (data & 1)
2805                 data &= BACKEND_DISABLE_MASK;
2806         else
2807                 data = 0;
2808         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2809
2810         data >>= BACKEND_DISABLE_SHIFT;
2811
2812         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2813
2814         return data & mask;
2815 }
2816
2817 static void si_setup_rb(struct radeon_device *rdev,
2818                         u32 se_num, u32 sh_per_se,
2819                         u32 max_rb_num)
2820 {
2821         int i, j;
2822         u32 data, mask;
2823         u32 disabled_rbs = 0;
2824         u32 enabled_rbs = 0;
2825
2826         for (i = 0; i < se_num; i++) {
2827                 for (j = 0; j < sh_per_se; j++) {
2828                         si_select_se_sh(rdev, i, j);
2829                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2830                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2831                 }
2832         }
2833         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2834
2835         mask = 1;
2836         for (i = 0; i < max_rb_num; i++) {
2837                 if (!(disabled_rbs & mask))
2838                         enabled_rbs |= mask;
2839                 mask <<= 1;
2840         }
2841
2842         for (i = 0; i < se_num; i++) {
2843                 si_select_se_sh(rdev, i, 0xffffffff);
2844                 data = 0;
2845                 for (j = 0; j < sh_per_se; j++) {
2846                         switch (enabled_rbs & 3) {
2847                         case 1:
2848                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2849                                 break;
2850                         case 2:
2851                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2852                                 break;
2853                         case 3:
2854                         default:
2855                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2856                                 break;
2857                         }
2858                         enabled_rbs >>= 2;
2859                 }
2860                 WREG32(PA_SC_RASTER_CONFIG, data);
2861         }
2862         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2863 }
2864
2865 static void si_gpu_init(struct radeon_device *rdev)
2866 {
2867         u32 gb_addr_config = 0;
2868         u32 mc_shared_chmap, mc_arb_ramcfg;
2869         u32 sx_debug_1;
2870         u32 hdp_host_path_cntl;
2871         u32 tmp;
2872         int i, j;
2873
2874         switch (rdev->family) {
2875         case CHIP_TAHITI:
2876                 rdev->config.si.max_shader_engines = 2;
2877                 rdev->config.si.max_tile_pipes = 12;
2878                 rdev->config.si.max_cu_per_sh = 8;
2879                 rdev->config.si.max_sh_per_se = 2;
2880                 rdev->config.si.max_backends_per_se = 4;
2881                 rdev->config.si.max_texture_channel_caches = 12;
2882                 rdev->config.si.max_gprs = 256;
2883                 rdev->config.si.max_gs_threads = 32;
2884                 rdev->config.si.max_hw_contexts = 8;
2885
2886                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2887                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2888                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2889                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2890                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2891                 break;
2892         case CHIP_PITCAIRN:
2893                 rdev->config.si.max_shader_engines = 2;
2894                 rdev->config.si.max_tile_pipes = 8;
2895                 rdev->config.si.max_cu_per_sh = 5;
2896                 rdev->config.si.max_sh_per_se = 2;
2897                 rdev->config.si.max_backends_per_se = 4;
2898                 rdev->config.si.max_texture_channel_caches = 8;
2899                 rdev->config.si.max_gprs = 256;
2900                 rdev->config.si.max_gs_threads = 32;
2901                 rdev->config.si.max_hw_contexts = 8;
2902
2903                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2904                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2905                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2906                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2907                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2908                 break;
2909         case CHIP_VERDE:
2910         default:
2911                 rdev->config.si.max_shader_engines = 1;
2912                 rdev->config.si.max_tile_pipes = 4;
2913                 rdev->config.si.max_cu_per_sh = 5;
2914                 rdev->config.si.max_sh_per_se = 2;
2915                 rdev->config.si.max_backends_per_se = 4;
2916                 rdev->config.si.max_texture_channel_caches = 4;
2917                 rdev->config.si.max_gprs = 256;
2918                 rdev->config.si.max_gs_threads = 32;
2919                 rdev->config.si.max_hw_contexts = 8;
2920
2921                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2922                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2923                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2924                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2925                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2926                 break;
2927         case CHIP_OLAND:
2928                 rdev->config.si.max_shader_engines = 1;
2929                 rdev->config.si.max_tile_pipes = 4;
2930                 rdev->config.si.max_cu_per_sh = 6;
2931                 rdev->config.si.max_sh_per_se = 1;
2932                 rdev->config.si.max_backends_per_se = 2;
2933                 rdev->config.si.max_texture_channel_caches = 4;
2934                 rdev->config.si.max_gprs = 256;
2935                 rdev->config.si.max_gs_threads = 16;
2936                 rdev->config.si.max_hw_contexts = 8;
2937
2938                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2939                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2940                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2941                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2942                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2943                 break;
2944         case CHIP_HAINAN:
2945                 rdev->config.si.max_shader_engines = 1;
2946                 rdev->config.si.max_tile_pipes = 4;
2947                 rdev->config.si.max_cu_per_sh = 5;
2948                 rdev->config.si.max_sh_per_se = 1;
2949                 rdev->config.si.max_backends_per_se = 1;
2950                 rdev->config.si.max_texture_channel_caches = 2;
2951                 rdev->config.si.max_gprs = 256;
2952                 rdev->config.si.max_gs_threads = 16;
2953                 rdev->config.si.max_hw_contexts = 8;
2954
2955                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2956                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2957                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2958                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2959                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2960                 break;
2961         }
2962
2963         /* Initialize HDP */
2964         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2965                 WREG32((0x2c14 + j), 0x00000000);
2966                 WREG32((0x2c18 + j), 0x00000000);
2967                 WREG32((0x2c1c + j), 0x00000000);
2968                 WREG32((0x2c20 + j), 0x00000000);
2969                 WREG32((0x2c24 + j), 0x00000000);
2970         }
2971
2972         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2973
2974         evergreen_fix_pci_max_read_req_size(rdev);
2975
2976         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2977
2978         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2979         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2980
2981         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2982         rdev->config.si.mem_max_burst_length_bytes = 256;
2983         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2984         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2985         if (rdev->config.si.mem_row_size_in_kb > 4)
2986                 rdev->config.si.mem_row_size_in_kb = 4;
2987         /* XXX use MC settings? */
2988         rdev->config.si.shader_engine_tile_size = 32;
2989         rdev->config.si.num_gpus = 1;
2990         rdev->config.si.multi_gpu_tile_size = 64;
2991
2992         /* fix up row size */
2993         gb_addr_config &= ~ROW_SIZE_MASK;
2994         switch (rdev->config.si.mem_row_size_in_kb) {
2995         case 1:
2996         default:
2997                 gb_addr_config |= ROW_SIZE(0);
2998                 break;
2999         case 2:
3000                 gb_addr_config |= ROW_SIZE(1);
3001                 break;
3002         case 4:
3003                 gb_addr_config |= ROW_SIZE(2);
3004                 break;
3005         }
3006
3007         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3008          * not have bank info, so create a custom tiling dword.
3009          * bits 3:0   num_pipes
3010          * bits 7:4   num_banks
3011          * bits 11:8  group_size
3012          * bits 15:12 row_size
3013          */
3014         rdev->config.si.tile_config = 0;
3015         switch (rdev->config.si.num_tile_pipes) {
3016         case 1:
3017                 rdev->config.si.tile_config |= (0 << 0);
3018                 break;
3019         case 2:
3020                 rdev->config.si.tile_config |= (1 << 0);
3021                 break;
3022         case 4:
3023                 rdev->config.si.tile_config |= (2 << 0);
3024                 break;
3025         case 8:
3026         default:
3027                 /* XXX what about 12? */
3028                 rdev->config.si.tile_config |= (3 << 0);
3029                 break;
3030         }       
3031         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3032         case 0: /* four banks */
3033                 rdev->config.si.tile_config |= 0 << 4;
3034                 break;
3035         case 1: /* eight banks */
3036                 rdev->config.si.tile_config |= 1 << 4;
3037                 break;
3038         case 2: /* sixteen banks */
3039         default:
3040                 rdev->config.si.tile_config |= 2 << 4;
3041                 break;
3042         }
3043         rdev->config.si.tile_config |=
3044                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3045         rdev->config.si.tile_config |=
3046                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3047
3048         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3049         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3050         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3051         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3052         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3053         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3054         if (rdev->has_uvd) {
3055                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3056                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3057                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3058         }
3059
3060         si_tiling_mode_table_init(rdev);
3061
3062         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3063                     rdev->config.si.max_sh_per_se,
3064                     rdev->config.si.max_backends_per_se);
3065
3066         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3067                      rdev->config.si.max_sh_per_se,
3068                      rdev->config.si.max_cu_per_sh);
3069
3070
3071         /* set HW defaults for 3D engine */
3072         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3073                                      ROQ_IB2_START(0x2b)));
3074         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3075
3076         sx_debug_1 = RREG32(SX_DEBUG_1);
3077         WREG32(SX_DEBUG_1, sx_debug_1);
3078
3079         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3080
3081         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3082                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3083                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3084                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3085
3086         WREG32(VGT_NUM_INSTANCES, 1);
3087
3088         WREG32(CP_PERFMON_CNTL, 0);
3089
3090         WREG32(SQ_CONFIG, 0);
3091
3092         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3093                                           FORCE_EOV_MAX_REZ_CNT(255)));
3094
3095         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3096                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3097
3098         WREG32(VGT_GS_VERTEX_REUSE, 16);
3099         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3100
3101         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3102         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3103         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3104         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3105         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3106         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3107         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3108         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3109
3110         tmp = RREG32(HDP_MISC_CNTL);
3111         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3112         WREG32(HDP_MISC_CNTL, tmp);
3113
3114         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3115         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3116
3117         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3118
3119         udelay(50);
3120 }
3121
3122 /*
3123  * GPU scratch registers helpers function.
3124  */
3125 static void si_scratch_init(struct radeon_device *rdev)
3126 {
3127         int i;
3128
3129         rdev->scratch.num_reg = 7;
3130         rdev->scratch.reg_base = SCRATCH_REG0;
3131         for (i = 0; i < rdev->scratch.num_reg; i++) {
3132                 rdev->scratch.free[i] = true;
3133                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3134         }
3135 }
3136
3137 void si_fence_ring_emit(struct radeon_device *rdev,
3138                         struct radeon_fence *fence)
3139 {
3140         struct radeon_ring *ring = &rdev->ring[fence->ring];
3141         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3142
3143         /* flush read cache over gart */
3144         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3145         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3146         radeon_ring_write(ring, 0);
3147         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3148         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3149                           PACKET3_TC_ACTION_ENA |
3150                           PACKET3_SH_KCACHE_ACTION_ENA |
3151                           PACKET3_SH_ICACHE_ACTION_ENA);
3152         radeon_ring_write(ring, 0xFFFFFFFF);
3153         radeon_ring_write(ring, 0);
3154         radeon_ring_write(ring, 10); /* poll interval */
3155         /* EVENT_WRITE_EOP - flush caches, send int */
3156         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3157         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3158         radeon_ring_write(ring, addr & 0xffffffff);
3159         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3160         radeon_ring_write(ring, fence->seq);
3161         radeon_ring_write(ring, 0);
3162 }
3163
3164 /*
3165  * IB stuff
3166  */
3167 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3168 {
3169         struct radeon_ring *ring = &rdev->ring[ib->ring];
3170         u32 header;
3171
3172         if (ib->is_const_ib) {
3173                 /* set switch buffer packet before const IB */
3174                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3175                 radeon_ring_write(ring, 0);
3176
3177                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3178         } else {
3179                 u32 next_rptr;
3180                 if (ring->rptr_save_reg) {
3181                         next_rptr = ring->wptr + 3 + 4 + 8;
3182                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3183                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3184                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3185                         radeon_ring_write(ring, next_rptr);
3186                 } else if (rdev->wb.enabled) {
3187                         next_rptr = ring->wptr + 5 + 4 + 8;
3188                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3189                         radeon_ring_write(ring, (1 << 8));
3190                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3191                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3192                         radeon_ring_write(ring, next_rptr);
3193                 }
3194
3195                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3196         }
3197
3198         radeon_ring_write(ring, header);
3199         radeon_ring_write(ring,
3200 #ifdef __BIG_ENDIAN
3201                           (2 << 0) |
3202 #endif
3203                           (ib->gpu_addr & 0xFFFFFFFC));
3204         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3205         radeon_ring_write(ring, ib->length_dw |
3206                           (ib->vm ? (ib->vm->id << 24) : 0));
3207
3208         if (!ib->is_const_ib) {
3209                 /* flush read cache over gart for this vmid */
3210                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3211                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3212                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3213                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3214                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3215                                   PACKET3_TC_ACTION_ENA |
3216                                   PACKET3_SH_KCACHE_ACTION_ENA |
3217                                   PACKET3_SH_ICACHE_ACTION_ENA);
3218                 radeon_ring_write(ring, 0xFFFFFFFF);
3219                 radeon_ring_write(ring, 0);
3220                 radeon_ring_write(ring, 10); /* poll interval */
3221         }
3222 }
3223
3224 /*
3225  * CP.
3226  */
3227 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3228 {
3229         if (enable)
3230                 WREG32(CP_ME_CNTL, 0);
3231         else {
3232                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3233                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3234                 WREG32(SCRATCH_UMSK, 0);
3235                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3236                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3237                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3238         }
3239         udelay(50);
3240 }
3241
3242 static int si_cp_load_microcode(struct radeon_device *rdev)
3243 {
3244         const __be32 *fw_data;
3245         int i;
3246
3247         if (!rdev->me_fw || !rdev->pfp_fw)
3248                 return -EINVAL;
3249
3250         si_cp_enable(rdev, false);
3251
3252         /* PFP */
3253         fw_data = (const __be32 *)rdev->pfp_fw->data;
3254         WREG32(CP_PFP_UCODE_ADDR, 0);
3255         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3256                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3257         WREG32(CP_PFP_UCODE_ADDR, 0);
3258
3259         /* CE */
3260         fw_data = (const __be32 *)rdev->ce_fw->data;
3261         WREG32(CP_CE_UCODE_ADDR, 0);
3262         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3263                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3264         WREG32(CP_CE_UCODE_ADDR, 0);
3265
3266         /* ME */
3267         fw_data = (const __be32 *)rdev->me_fw->data;
3268         WREG32(CP_ME_RAM_WADDR, 0);
3269         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3270                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3271         WREG32(CP_ME_RAM_WADDR, 0);
3272
3273         WREG32(CP_PFP_UCODE_ADDR, 0);
3274         WREG32(CP_CE_UCODE_ADDR, 0);
3275         WREG32(CP_ME_RAM_WADDR, 0);
3276         WREG32(CP_ME_RAM_RADDR, 0);
3277         return 0;
3278 }
3279
3280 static int si_cp_start(struct radeon_device *rdev)
3281 {
3282         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3283         int r, i;
3284
3285         r = radeon_ring_lock(rdev, ring, 7 + 4);
3286         if (r) {
3287                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3288                 return r;
3289         }
3290         /* init the CP */
3291         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3292         radeon_ring_write(ring, 0x1);
3293         radeon_ring_write(ring, 0x0);
3294         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3295         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3296         radeon_ring_write(ring, 0);
3297         radeon_ring_write(ring, 0);
3298
3299         /* init the CE partitions */
3300         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3301         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3302         radeon_ring_write(ring, 0xc000);
3303         radeon_ring_write(ring, 0xe000);
3304         radeon_ring_unlock_commit(rdev, ring);
3305
3306         si_cp_enable(rdev, true);
3307
3308         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3309         if (r) {
3310                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3311                 return r;
3312         }
3313
3314         /* setup clear context state */
3315         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3316         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3317
3318         for (i = 0; i < si_default_size; i++)
3319                 radeon_ring_write(ring, si_default_state[i]);
3320
3321         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3322         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3323
3324         /* set clear context state */
3325         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3326         radeon_ring_write(ring, 0);
3327
3328         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3329         radeon_ring_write(ring, 0x00000316);
3330         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3331         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3332
3333         radeon_ring_unlock_commit(rdev, ring);
3334
3335         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3336                 ring = &rdev->ring[i];
3337                 r = radeon_ring_lock(rdev, ring, 2);
3338
3339                 /* clear the compute context state */
3340                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3341                 radeon_ring_write(ring, 0);
3342
3343                 radeon_ring_unlock_commit(rdev, ring);
3344         }
3345
3346         return 0;
3347 }
3348
3349 static void si_cp_fini(struct radeon_device *rdev)
3350 {
3351         struct radeon_ring *ring;
3352         si_cp_enable(rdev, false);
3353
3354         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3355         radeon_ring_fini(rdev, ring);
3356         radeon_scratch_free(rdev, ring->rptr_save_reg);
3357
3358         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3359         radeon_ring_fini(rdev, ring);
3360         radeon_scratch_free(rdev, ring->rptr_save_reg);
3361
3362         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3363         radeon_ring_fini(rdev, ring);
3364         radeon_scratch_free(rdev, ring->rptr_save_reg);
3365 }
3366
3367 static int si_cp_resume(struct radeon_device *rdev)
3368 {
3369         struct radeon_ring *ring;
3370         u32 tmp;
3371         u32 rb_bufsz;
3372         int r;
3373
3374         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3375         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3376
3377         /* Set the write pointer delay */
3378         WREG32(CP_RB_WPTR_DELAY, 0);
3379
3380         WREG32(CP_DEBUG, 0);
3381         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3382
3383         /* ring 0 - compute and gfx */
3384         /* Set ring buffer size */
3385         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3386         rb_bufsz = drm_order(ring->ring_size / 8);
3387         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3388 #ifdef __BIG_ENDIAN
3389         tmp |= BUF_SWAP_32BIT;
3390 #endif
3391         WREG32(CP_RB0_CNTL, tmp);
3392
3393         /* Initialize the ring buffer's read and write pointers */
3394         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3395         ring->wptr = 0;
3396         WREG32(CP_RB0_WPTR, ring->wptr);
3397
3398         /* set the wb address whether it's enabled or not */
3399         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3400         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3401
3402         if (rdev->wb.enabled)
3403                 WREG32(SCRATCH_UMSK, 0xff);
3404         else {
3405                 tmp |= RB_NO_UPDATE;
3406                 WREG32(SCRATCH_UMSK, 0);
3407         }
3408
3409         mdelay(1);
3410         WREG32(CP_RB0_CNTL, tmp);
3411
3412         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3413
3414         ring->rptr = RREG32(CP_RB0_RPTR);
3415
3416         /* ring1  - compute only */
3417         /* Set ring buffer size */
3418         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3419         rb_bufsz = drm_order(ring->ring_size / 8);
3420         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422         tmp |= BUF_SWAP_32BIT;
3423 #endif
3424         WREG32(CP_RB1_CNTL, tmp);
3425
3426         /* Initialize the ring buffer's read and write pointers */
3427         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3428         ring->wptr = 0;
3429         WREG32(CP_RB1_WPTR, ring->wptr);
3430
3431         /* set the wb address whether it's enabled or not */
3432         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3433         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3434
3435         mdelay(1);
3436         WREG32(CP_RB1_CNTL, tmp);
3437
3438         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3439
3440         ring->rptr = RREG32(CP_RB1_RPTR);
3441
3442         /* ring2 - compute only */
3443         /* Set ring buffer size */
3444         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3445         rb_bufsz = drm_order(ring->ring_size / 8);
3446         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3447 #ifdef __BIG_ENDIAN
3448         tmp |= BUF_SWAP_32BIT;
3449 #endif
3450         WREG32(CP_RB2_CNTL, tmp);
3451
3452         /* Initialize the ring buffer's read and write pointers */
3453         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3454         ring->wptr = 0;
3455         WREG32(CP_RB2_WPTR, ring->wptr);
3456
3457         /* set the wb address whether it's enabled or not */
3458         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3459         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3460
3461         mdelay(1);
3462         WREG32(CP_RB2_CNTL, tmp);
3463
3464         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3465
3466         ring->rptr = RREG32(CP_RB2_RPTR);
3467
3468         /* start the rings */
3469         si_cp_start(rdev);
3470         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3471         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3472         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3473         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3474         if (r) {
3475                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3476                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3477                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3478                 return r;
3479         }
3480         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3481         if (r) {
3482                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483         }
3484         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3485         if (r) {
3486                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3487         }
3488
3489         return 0;
3490 }
3491
3492 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3493 {
3494         u32 reset_mask = 0;
3495         u32 tmp;
3496
3497         /* GRBM_STATUS */
3498         tmp = RREG32(GRBM_STATUS);
3499         if (tmp & (PA_BUSY | SC_BUSY |
3500                    BCI_BUSY | SX_BUSY |
3501                    TA_BUSY | VGT_BUSY |
3502                    DB_BUSY | CB_BUSY |
3503                    GDS_BUSY | SPI_BUSY |
3504                    IA_BUSY | IA_BUSY_NO_DMA))
3505                 reset_mask |= RADEON_RESET_GFX;
3506
3507         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3508                    CP_BUSY | CP_COHERENCY_BUSY))
3509                 reset_mask |= RADEON_RESET_CP;
3510
3511         if (tmp & GRBM_EE_BUSY)
3512                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3513
3514         /* GRBM_STATUS2 */
3515         tmp = RREG32(GRBM_STATUS2);
3516         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3517                 reset_mask |= RADEON_RESET_RLC;
3518
3519         /* DMA_STATUS_REG 0 */
3520         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3521         if (!(tmp & DMA_IDLE))
3522                 reset_mask |= RADEON_RESET_DMA;
3523
3524         /* DMA_STATUS_REG 1 */
3525         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3526         if (!(tmp & DMA_IDLE))
3527                 reset_mask |= RADEON_RESET_DMA1;
3528
3529         /* SRBM_STATUS2 */
3530         tmp = RREG32(SRBM_STATUS2);
3531         if (tmp & DMA_BUSY)
3532                 reset_mask |= RADEON_RESET_DMA;
3533
3534         if (tmp & DMA1_BUSY)
3535                 reset_mask |= RADEON_RESET_DMA1;
3536
3537         /* SRBM_STATUS */
3538         tmp = RREG32(SRBM_STATUS);
3539
3540         if (tmp & IH_BUSY)
3541                 reset_mask |= RADEON_RESET_IH;
3542
3543         if (tmp & SEM_BUSY)
3544                 reset_mask |= RADEON_RESET_SEM;
3545
3546         if (tmp & GRBM_RQ_PENDING)
3547                 reset_mask |= RADEON_RESET_GRBM;
3548
3549         if (tmp & VMC_BUSY)
3550                 reset_mask |= RADEON_RESET_VMC;
3551
3552         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3553                    MCC_BUSY | MCD_BUSY))
3554                 reset_mask |= RADEON_RESET_MC;
3555
3556         if (evergreen_is_display_hung(rdev))
3557                 reset_mask |= RADEON_RESET_DISPLAY;
3558
3559         /* VM_L2_STATUS */
3560         tmp = RREG32(VM_L2_STATUS);
3561         if (tmp & L2_BUSY)
3562                 reset_mask |= RADEON_RESET_VMC;
3563
3564         /* Skip MC reset as it's mostly likely not hung, just busy */
3565         if (reset_mask & RADEON_RESET_MC) {
3566                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3567                 reset_mask &= ~RADEON_RESET_MC;
3568         }
3569
3570         return reset_mask;
3571 }
3572
3573 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3574 {
3575         struct evergreen_mc_save save;
3576         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3577         u32 tmp;
3578
3579         if (reset_mask == 0)
3580                 return;
3581
3582         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3583
3584         evergreen_print_gpu_status_regs(rdev);
3585         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3586                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3587         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3588                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3589
3590         /* Disable CP parsing/prefetching */
3591         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3592
3593         if (reset_mask & RADEON_RESET_DMA) {
3594                 /* dma0 */
3595                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3596                 tmp &= ~DMA_RB_ENABLE;
3597                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3598         }
3599         if (reset_mask & RADEON_RESET_DMA1) {
3600                 /* dma1 */
3601                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3602                 tmp &= ~DMA_RB_ENABLE;
3603                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3604         }
3605
3606         udelay(50);
3607
3608         evergreen_mc_stop(rdev, &save);
3609         if (evergreen_mc_wait_for_idle(rdev)) {
3610                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3611         }
3612
3613         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3614                 grbm_soft_reset = SOFT_RESET_CB |
3615                         SOFT_RESET_DB |
3616                         SOFT_RESET_GDS |
3617                         SOFT_RESET_PA |
3618                         SOFT_RESET_SC |
3619                         SOFT_RESET_BCI |
3620                         SOFT_RESET_SPI |
3621                         SOFT_RESET_SX |
3622                         SOFT_RESET_TC |
3623                         SOFT_RESET_TA |
3624                         SOFT_RESET_VGT |
3625                         SOFT_RESET_IA;
3626         }
3627
3628         if (reset_mask & RADEON_RESET_CP) {
3629                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3630
3631                 srbm_soft_reset |= SOFT_RESET_GRBM;
3632         }
3633
3634         if (reset_mask & RADEON_RESET_DMA)
3635                 srbm_soft_reset |= SOFT_RESET_DMA;
3636
3637         if (reset_mask & RADEON_RESET_DMA1)
3638                 srbm_soft_reset |= SOFT_RESET_DMA1;
3639
3640         if (reset_mask & RADEON_RESET_DISPLAY)
3641                 srbm_soft_reset |= SOFT_RESET_DC;
3642
3643         if (reset_mask & RADEON_RESET_RLC)
3644                 grbm_soft_reset |= SOFT_RESET_RLC;
3645
3646         if (reset_mask & RADEON_RESET_SEM)
3647                 srbm_soft_reset |= SOFT_RESET_SEM;
3648
3649         if (reset_mask & RADEON_RESET_IH)
3650                 srbm_soft_reset |= SOFT_RESET_IH;
3651
3652         if (reset_mask & RADEON_RESET_GRBM)
3653                 srbm_soft_reset |= SOFT_RESET_GRBM;
3654
3655         if (reset_mask & RADEON_RESET_VMC)
3656                 srbm_soft_reset |= SOFT_RESET_VMC;
3657
3658         if (reset_mask & RADEON_RESET_MC)
3659                 srbm_soft_reset |= SOFT_RESET_MC;
3660
3661         if (grbm_soft_reset) {
3662                 tmp = RREG32(GRBM_SOFT_RESET);
3663                 tmp |= grbm_soft_reset;
3664                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3665                 WREG32(GRBM_SOFT_RESET, tmp);
3666                 tmp = RREG32(GRBM_SOFT_RESET);
3667
3668                 udelay(50);
3669
3670                 tmp &= ~grbm_soft_reset;
3671                 WREG32(GRBM_SOFT_RESET, tmp);
3672                 tmp = RREG32(GRBM_SOFT_RESET);
3673         }
3674
3675         if (srbm_soft_reset) {
3676                 tmp = RREG32(SRBM_SOFT_RESET);
3677                 tmp |= srbm_soft_reset;
3678                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3679                 WREG32(SRBM_SOFT_RESET, tmp);
3680                 tmp = RREG32(SRBM_SOFT_RESET);
3681
3682                 udelay(50);
3683
3684                 tmp &= ~srbm_soft_reset;
3685                 WREG32(SRBM_SOFT_RESET, tmp);
3686                 tmp = RREG32(SRBM_SOFT_RESET);
3687         }
3688
3689         /* Wait a little for things to settle down */
3690         udelay(50);
3691
3692         evergreen_mc_resume(rdev, &save);
3693         udelay(50);
3694
3695         evergreen_print_gpu_status_regs(rdev);
3696 }
3697
3698 int si_asic_reset(struct radeon_device *rdev)
3699 {
3700         u32 reset_mask;
3701
3702         reset_mask = si_gpu_check_soft_reset(rdev);
3703
3704         if (reset_mask)
3705                 r600_set_bios_scratch_engine_hung(rdev, true);
3706
3707         si_gpu_soft_reset(rdev, reset_mask);
3708
3709         reset_mask = si_gpu_check_soft_reset(rdev);
3710
3711         if (!reset_mask)
3712                 r600_set_bios_scratch_engine_hung(rdev, false);
3713
3714         return 0;
3715 }
3716
3717 /**
3718  * si_gfx_is_lockup - Check if the GFX engine is locked up
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Check if the GFX engine is locked up.
3724  * Returns true if the engine appears to be locked up, false if not.
3725  */
3726 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3727 {
3728         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3729
3730         if (!(reset_mask & (RADEON_RESET_GFX |
3731                             RADEON_RESET_COMPUTE |
3732                             RADEON_RESET_CP))) {
3733                 radeon_ring_lockup_update(ring);
3734                 return false;
3735         }
3736         /* force CP activities */
3737         radeon_ring_force_activity(rdev, ring);
3738         return radeon_ring_test_lockup(rdev, ring);
3739 }
3740
3741 /* MC */
3742 static void si_mc_program(struct radeon_device *rdev)
3743 {
3744         struct evergreen_mc_save save;
3745         u32 tmp;
3746         int i, j;
3747
3748         /* Initialize HDP */
3749         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3750                 WREG32((0x2c14 + j), 0x00000000);
3751                 WREG32((0x2c18 + j), 0x00000000);
3752                 WREG32((0x2c1c + j), 0x00000000);
3753                 WREG32((0x2c20 + j), 0x00000000);
3754                 WREG32((0x2c24 + j), 0x00000000);
3755         }
3756         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3757
3758         evergreen_mc_stop(rdev, &save);
3759         if (radeon_mc_wait_for_idle(rdev)) {
3760                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3761         }
3762         if (!ASIC_IS_NODCE(rdev))
3763                 /* Lockout access through VGA aperture*/
3764                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3765         /* Update configuration */
3766         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3767                rdev->mc.vram_start >> 12);
3768         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3769                rdev->mc.vram_end >> 12);
3770         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3771                rdev->vram_scratch.gpu_addr >> 12);
3772         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3773         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3774         WREG32(MC_VM_FB_LOCATION, tmp);
3775         /* XXX double check these! */
3776         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3777         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3778         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3779         WREG32(MC_VM_AGP_BASE, 0);
3780         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3781         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3782         if (radeon_mc_wait_for_idle(rdev)) {
3783                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3784         }
3785         evergreen_mc_resume(rdev, &save);
3786         if (!ASIC_IS_NODCE(rdev)) {
3787                 /* we need to own VRAM, so turn off the VGA renderer here
3788                  * to stop it overwriting our objects */
3789                 rv515_vga_render_disable(rdev);
3790         }
3791 }
3792
3793 void si_vram_gtt_location(struct radeon_device *rdev,
3794                           struct radeon_mc *mc)
3795 {
3796         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3797                 /* leave room for at least 1024M GTT */
3798                 dev_warn(rdev->dev, "limiting VRAM\n");
3799                 mc->real_vram_size = 0xFFC0000000ULL;
3800                 mc->mc_vram_size = 0xFFC0000000ULL;
3801         }
3802         radeon_vram_location(rdev, &rdev->mc, 0);
3803         rdev->mc.gtt_base_align = 0;
3804         radeon_gtt_location(rdev, mc);
3805 }
3806
3807 static int si_mc_init(struct radeon_device *rdev)
3808 {
3809         u32 tmp;
3810         int chansize, numchan;
3811
3812         /* Get VRAM informations */
3813         rdev->mc.vram_is_ddr = true;
3814         tmp = RREG32(MC_ARB_RAMCFG);
3815         if (tmp & CHANSIZE_OVERRIDE) {
3816                 chansize = 16;
3817         } else if (tmp & CHANSIZE_MASK) {
3818                 chansize = 64;
3819         } else {
3820                 chansize = 32;
3821         }
3822         tmp = RREG32(MC_SHARED_CHMAP);
3823         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3824         case 0:
3825         default:
3826                 numchan = 1;
3827                 break;
3828         case 1:
3829                 numchan = 2;
3830                 break;
3831         case 2:
3832                 numchan = 4;
3833                 break;
3834         case 3:
3835                 numchan = 8;
3836                 break;
3837         case 4:
3838                 numchan = 3;
3839                 break;
3840         case 5:
3841                 numchan = 6;
3842                 break;
3843         case 6:
3844                 numchan = 10;
3845                 break;
3846         case 7:
3847                 numchan = 12;
3848                 break;
3849         case 8:
3850                 numchan = 16;
3851                 break;
3852         }
3853         rdev->mc.vram_width = numchan * chansize;
3854         /* Could aper size report 0 ? */
3855         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3856         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3857         /* size in MB on si */
3858         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3859         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3860         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3861         si_vram_gtt_location(rdev, &rdev->mc);
3862         radeon_update_bandwidth_info(rdev);
3863
3864         return 0;
3865 }
3866
3867 /*
3868  * GART
3869  */
3870 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3871 {
3872         /* flush hdp cache */
3873         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3874
3875         /* bits 0-15 are the VM contexts0-15 */
3876         WREG32(VM_INVALIDATE_REQUEST, 1);
3877 }
3878
3879 static int si_pcie_gart_enable(struct radeon_device *rdev)
3880 {
3881         int r, i;
3882
3883         if (rdev->gart.robj == NULL) {
3884                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3885                 return -EINVAL;
3886         }
3887         r = radeon_gart_table_vram_pin(rdev);
3888         if (r)
3889                 return r;
3890         radeon_gart_restore(rdev);
3891         /* Setup TLB control */
3892         WREG32(MC_VM_MX_L1_TLB_CNTL,
3893                (0xA << 7) |
3894                ENABLE_L1_TLB |
3895                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3896                ENABLE_ADVANCED_DRIVER_MODEL |
3897                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3898         /* Setup L2 cache */
3899         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3900                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3901                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3902                EFFECTIVE_L2_QUEUE_SIZE(7) |
3903                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3904         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3905         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3906                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3907         /* setup context0 */
3908         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3909         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3910         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3911         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3912                         (u32)(rdev->dummy_page.addr >> 12));
3913         WREG32(VM_CONTEXT0_CNTL2, 0);
3914         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3915                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3916
3917         WREG32(0x15D4, 0);
3918         WREG32(0x15D8, 0);
3919         WREG32(0x15DC, 0);
3920
3921         /* empty context1-15 */
3922         /* set vm size, must be a multiple of 4 */
3923         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3924         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3925         /* Assign the pt base to something valid for now; the pts used for
3926          * the VMs are determined by the application and setup and assigned
3927          * on the fly in the vm part of radeon_gart.c
3928          */
3929         for (i = 1; i < 16; i++) {
3930                 if (i < 8)
3931                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3932                                rdev->gart.table_addr >> 12);
3933                 else
3934                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3935                                rdev->gart.table_addr >> 12);
3936         }
3937
3938         /* enable context1-15 */
3939         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3940                (u32)(rdev->dummy_page.addr >> 12));
3941         WREG32(VM_CONTEXT1_CNTL2, 4);
3942         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3943                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3944                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3945                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3946                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3947                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3948                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3949                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3950                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3951                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3952                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3953                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3954                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3955
3956         si_pcie_gart_tlb_flush(rdev);
3957         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3958                  (unsigned)(rdev->mc.gtt_size >> 20),
3959                  (unsigned long long)rdev->gart.table_addr);
3960         rdev->gart.ready = true;
3961         return 0;
3962 }
3963
3964 static void si_pcie_gart_disable(struct radeon_device *rdev)
3965 {
3966         /* Disable all tables */
3967         WREG32(VM_CONTEXT0_CNTL, 0);
3968         WREG32(VM_CONTEXT1_CNTL, 0);
3969         /* Setup TLB control */
3970         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3971                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3972         /* Setup L2 cache */
3973         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3974                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3975                EFFECTIVE_L2_QUEUE_SIZE(7) |
3976                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3977         WREG32(VM_L2_CNTL2, 0);
3978         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3979                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3980         radeon_gart_table_vram_unpin(rdev);
3981 }
3982
3983 static void si_pcie_gart_fini(struct radeon_device *rdev)
3984 {
3985         si_pcie_gart_disable(rdev);
3986         radeon_gart_table_vram_free(rdev);
3987         radeon_gart_fini(rdev);
3988 }
3989
3990 /* vm parser */
3991 static bool si_vm_reg_valid(u32 reg)
3992 {
3993         /* context regs are fine */
3994         if (reg >= 0x28000)
3995                 return true;
3996
3997         /* check config regs */
3998         switch (reg) {
3999         case GRBM_GFX_INDEX:
4000         case CP_STRMOUT_CNTL:
4001         case VGT_VTX_VECT_EJECT_REG:
4002         case VGT_CACHE_INVALIDATION:
4003         case VGT_ESGS_RING_SIZE:
4004         case VGT_GSVS_RING_SIZE:
4005         case VGT_GS_VERTEX_REUSE:
4006         case VGT_PRIMITIVE_TYPE:
4007         case VGT_INDEX_TYPE:
4008         case VGT_NUM_INDICES:
4009         case VGT_NUM_INSTANCES:
4010         case VGT_TF_RING_SIZE:
4011         case VGT_HS_OFFCHIP_PARAM:
4012         case VGT_TF_MEMORY_BASE:
4013         case PA_CL_ENHANCE:
4014         case PA_SU_LINE_STIPPLE_VALUE:
4015         case PA_SC_LINE_STIPPLE_STATE:
4016         case PA_SC_ENHANCE:
4017         case SQC_CACHES:
4018         case SPI_STATIC_THREAD_MGMT_1:
4019         case SPI_STATIC_THREAD_MGMT_2:
4020         case SPI_STATIC_THREAD_MGMT_3:
4021         case SPI_PS_MAX_WAVE_ID:
4022         case SPI_CONFIG_CNTL:
4023         case SPI_CONFIG_CNTL_1:
4024         case TA_CNTL_AUX:
4025                 return true;
4026         default:
4027                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4028                 return false;
4029         }
4030 }
4031
4032 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4033                                   u32 *ib, struct radeon_cs_packet *pkt)
4034 {
4035         switch (pkt->opcode) {
4036         case PACKET3_NOP:
4037         case PACKET3_SET_BASE:
4038         case PACKET3_SET_CE_DE_COUNTERS:
4039         case PACKET3_LOAD_CONST_RAM:
4040         case PACKET3_WRITE_CONST_RAM:
4041         case PACKET3_WRITE_CONST_RAM_OFFSET:
4042         case PACKET3_DUMP_CONST_RAM:
4043         case PACKET3_INCREMENT_CE_COUNTER:
4044         case PACKET3_WAIT_ON_DE_COUNTER:
4045         case PACKET3_CE_WRITE:
4046                 break;
4047         default:
4048                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4049                 return -EINVAL;
4050         }
4051         return 0;
4052 }
4053
4054 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4055 {
4056         u32 start_reg, reg, i;
4057         u32 command = ib[idx + 4];
4058         u32 info = ib[idx + 1];
4059         u32 idx_value = ib[idx];
4060         if (command & PACKET3_CP_DMA_CMD_SAS) {
4061                 /* src address space is register */
4062                 if (((info & 0x60000000) >> 29) == 0) {
4063                         start_reg = idx_value << 2;
4064                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4065                                 reg = start_reg;
4066                                 if (!si_vm_reg_valid(reg)) {
4067                                         DRM_ERROR("CP DMA Bad SRC register\n");
4068                                         return -EINVAL;
4069                                 }
4070                         } else {
4071                                 for (i = 0; i < (command & 0x1fffff); i++) {
4072                                         reg = start_reg + (4 * i);
4073                                         if (!si_vm_reg_valid(reg)) {
4074                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4075                                                 return -EINVAL;
4076                                         }
4077                                 }
4078                         }
4079                 }
4080         }
4081         if (command & PACKET3_CP_DMA_CMD_DAS) {
4082                 /* dst address space is register */
4083                 if (((info & 0x00300000) >> 20) == 0) {
4084                         start_reg = ib[idx + 2];
4085                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4086                                 reg = start_reg;
4087                                 if (!si_vm_reg_valid(reg)) {
4088                                         DRM_ERROR("CP DMA Bad DST register\n");
4089                                         return -EINVAL;
4090                                 }
4091                         } else {
4092                                 for (i = 0; i < (command & 0x1fffff); i++) {
4093                                         reg = start_reg + (4 * i);
4094                                 if (!si_vm_reg_valid(reg)) {
4095                                                 DRM_ERROR("CP DMA Bad DST register\n");
4096                                                 return -EINVAL;
4097                                         }
4098                                 }
4099                         }
4100                 }
4101         }
4102         return 0;
4103 }
4104
4105 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4106                                    u32 *ib, struct radeon_cs_packet *pkt)
4107 {
4108         int r;
4109         u32 idx = pkt->idx + 1;
4110         u32 idx_value = ib[idx];
4111         u32 start_reg, end_reg, reg, i;
4112
4113         switch (pkt->opcode) {
4114         case PACKET3_NOP:
4115         case PACKET3_SET_BASE:
4116         case PACKET3_CLEAR_STATE:
4117         case PACKET3_INDEX_BUFFER_SIZE:
4118         case PACKET3_DISPATCH_DIRECT:
4119         case PACKET3_DISPATCH_INDIRECT:
4120         case PACKET3_ALLOC_GDS:
4121         case PACKET3_WRITE_GDS_RAM:
4122         case PACKET3_ATOMIC_GDS:
4123         case PACKET3_ATOMIC:
4124         case PACKET3_OCCLUSION_QUERY:
4125         case PACKET3_SET_PREDICATION:
4126         case PACKET3_COND_EXEC:
4127         case PACKET3_PRED_EXEC:
4128         case PACKET3_DRAW_INDIRECT:
4129         case PACKET3_DRAW_INDEX_INDIRECT:
4130         case PACKET3_INDEX_BASE:
4131         case PACKET3_DRAW_INDEX_2:
4132         case PACKET3_CONTEXT_CONTROL:
4133         case PACKET3_INDEX_TYPE:
4134         case PACKET3_DRAW_INDIRECT_MULTI:
4135         case PACKET3_DRAW_INDEX_AUTO:
4136         case PACKET3_DRAW_INDEX_IMMD:
4137         case PACKET3_NUM_INSTANCES:
4138         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4139         case PACKET3_STRMOUT_BUFFER_UPDATE:
4140         case PACKET3_DRAW_INDEX_OFFSET_2:
4141         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4142         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4143         case PACKET3_MPEG_INDEX:
4144         case PACKET3_WAIT_REG_MEM:
4145         case PACKET3_MEM_WRITE:
4146         case PACKET3_PFP_SYNC_ME:
4147         case PACKET3_SURFACE_SYNC:
4148         case PACKET3_EVENT_WRITE:
4149         case PACKET3_EVENT_WRITE_EOP:
4150         case PACKET3_EVENT_WRITE_EOS:
4151         case PACKET3_SET_CONTEXT_REG:
4152         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4153         case PACKET3_SET_SH_REG:
4154         case PACKET3_SET_SH_REG_OFFSET:
4155         case PACKET3_INCREMENT_DE_COUNTER:
4156         case PACKET3_WAIT_ON_CE_COUNTER:
4157         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4158         case PACKET3_ME_WRITE:
4159                 break;
4160         case PACKET3_COPY_DATA:
4161                 if ((idx_value & 0xf00) == 0) {
4162                         reg = ib[idx + 3] * 4;
4163                         if (!si_vm_reg_valid(reg))
4164                                 return -EINVAL;
4165                 }
4166                 break;
4167         case PACKET3_WRITE_DATA:
4168                 if ((idx_value & 0xf00) == 0) {
4169                         start_reg = ib[idx + 1] * 4;
4170                         if (idx_value & 0x10000) {
4171                                 if (!si_vm_reg_valid(start_reg))
4172                                         return -EINVAL;
4173                         } else {
4174                                 for (i = 0; i < (pkt->count - 2); i++) {
4175                                         reg = start_reg + (4 * i);
4176                                         if (!si_vm_reg_valid(reg))
4177                                                 return -EINVAL;
4178                                 }
4179                         }
4180                 }
4181                 break;
4182         case PACKET3_COND_WRITE:
4183                 if (idx_value & 0x100) {
4184                         reg = ib[idx + 5] * 4;
4185                         if (!si_vm_reg_valid(reg))
4186                                 return -EINVAL;
4187                 }
4188                 break;
4189         case PACKET3_COPY_DW:
4190                 if (idx_value & 0x2) {
4191                         reg = ib[idx + 3] * 4;
4192                         if (!si_vm_reg_valid(reg))
4193                                 return -EINVAL;
4194                 }
4195                 break;
4196         case PACKET3_SET_CONFIG_REG:
4197                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4198                 end_reg = 4 * pkt->count + start_reg - 4;
4199                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4200                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4201                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4202                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4203                         return -EINVAL;
4204                 }
4205                 for (i = 0; i < pkt->count; i++) {
4206                         reg = start_reg + (4 * i);
4207                         if (!si_vm_reg_valid(reg))
4208                                 return -EINVAL;
4209                 }
4210                 break;
4211         case PACKET3_CP_DMA:
4212                 r = si_vm_packet3_cp_dma_check(ib, idx);
4213                 if (r)
4214                         return r;
4215                 break;
4216         default:
4217                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4218                 return -EINVAL;
4219         }
4220         return 0;
4221 }
4222
4223 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4224                                        u32 *ib, struct radeon_cs_packet *pkt)
4225 {
4226         int r;
4227         u32 idx = pkt->idx + 1;
4228         u32 idx_value = ib[idx];
4229         u32 start_reg, reg, i;
4230
4231         switch (pkt->opcode) {
4232         case PACKET3_NOP:
4233         case PACKET3_SET_BASE:
4234         case PACKET3_CLEAR_STATE:
4235         case PACKET3_DISPATCH_DIRECT:
4236         case PACKET3_DISPATCH_INDIRECT:
4237         case PACKET3_ALLOC_GDS:
4238         case PACKET3_WRITE_GDS_RAM:
4239         case PACKET3_ATOMIC_GDS:
4240         case PACKET3_ATOMIC:
4241         case PACKET3_OCCLUSION_QUERY:
4242         case PACKET3_SET_PREDICATION:
4243         case PACKET3_COND_EXEC:
4244         case PACKET3_PRED_EXEC:
4245         case PACKET3_CONTEXT_CONTROL:
4246         case PACKET3_STRMOUT_BUFFER_UPDATE:
4247         case PACKET3_WAIT_REG_MEM:
4248         case PACKET3_MEM_WRITE:
4249         case PACKET3_PFP_SYNC_ME:
4250         case PACKET3_SURFACE_SYNC:
4251         case PACKET3_EVENT_WRITE:
4252         case PACKET3_EVENT_WRITE_EOP:
4253         case PACKET3_EVENT_WRITE_EOS:
4254         case PACKET3_SET_CONTEXT_REG:
4255         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4256         case PACKET3_SET_SH_REG:
4257         case PACKET3_SET_SH_REG_OFFSET:
4258         case PACKET3_INCREMENT_DE_COUNTER:
4259         case PACKET3_WAIT_ON_CE_COUNTER:
4260         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4261         case PACKET3_ME_WRITE:
4262                 break;
4263         case PACKET3_COPY_DATA:
4264                 if ((idx_value & 0xf00) == 0) {
4265                         reg = ib[idx + 3] * 4;
4266                         if (!si_vm_reg_valid(reg))
4267                                 return -EINVAL;
4268                 }
4269                 break;
4270         case PACKET3_WRITE_DATA:
4271                 if ((idx_value & 0xf00) == 0) {
4272                         start_reg = ib[idx + 1] * 4;
4273                         if (idx_value & 0x10000) {
4274                                 if (!si_vm_reg_valid(start_reg))
4275                                         return -EINVAL;
4276                         } else {
4277                                 for (i = 0; i < (pkt->count - 2); i++) {
4278                                         reg = start_reg + (4 * i);
4279                                         if (!si_vm_reg_valid(reg))
4280                                                 return -EINVAL;
4281                                 }
4282                         }
4283                 }
4284                 break;
4285         case PACKET3_COND_WRITE:
4286                 if (idx_value & 0x100) {
4287                         reg = ib[idx + 5] * 4;
4288                         if (!si_vm_reg_valid(reg))
4289                                 return -EINVAL;
4290                 }
4291                 break;
4292         case PACKET3_COPY_DW:
4293                 if (idx_value & 0x2) {
4294                         reg = ib[idx + 3] * 4;
4295                         if (!si_vm_reg_valid(reg))
4296                                 return -EINVAL;
4297                 }
4298                 break;
4299         case PACKET3_CP_DMA:
4300                 r = si_vm_packet3_cp_dma_check(ib, idx);
4301                 if (r)
4302                         return r;
4303                 break;
4304         default:
4305                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4306                 return -EINVAL;
4307         }
4308         return 0;
4309 }
4310
4311 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4312 {
4313         int ret = 0;
4314         u32 idx = 0;
4315         struct radeon_cs_packet pkt;
4316
4317         do {
4318                 pkt.idx = idx;
4319                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4320                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4321                 pkt.one_reg_wr = 0;
4322                 switch (pkt.type) {
4323                 case RADEON_PACKET_TYPE0:
4324                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4325                         ret = -EINVAL;
4326                         break;
4327                 case RADEON_PACKET_TYPE2:
4328                         idx += 1;
4329                         break;
4330                 case RADEON_PACKET_TYPE3:
4331                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4332                         if (ib->is_const_ib)
4333                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4334                         else {
4335                                 switch (ib->ring) {
4336                                 case RADEON_RING_TYPE_GFX_INDEX:
4337                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4338                                         break;
4339                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4340                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4341                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4342                                         break;
4343                                 default:
4344                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4345                                         ret = -EINVAL;
4346                                         break;
4347                                 }
4348                         }
4349                         idx += pkt.count + 2;
4350                         break;
4351                 default:
4352                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4353                         ret = -EINVAL;
4354                         break;
4355                 }
4356                 if (ret)
4357                         break;
4358         } while (idx < ib->length_dw);
4359
4360         return ret;
4361 }
4362
4363 /*
4364  * vm
4365  */
4366 int si_vm_init(struct radeon_device *rdev)
4367 {
4368         /* number of VMs */
4369         rdev->vm_manager.nvm = 16;
4370         /* base offset of vram pages */
4371         rdev->vm_manager.vram_base_offset = 0;
4372
4373         return 0;
4374 }
4375
4376 void si_vm_fini(struct radeon_device *rdev)
4377 {
4378 }
4379
4380 /**
4381  * si_vm_decode_fault - print human readable fault info
4382  *
4383  * @rdev: radeon_device pointer
4384  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4385  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4386  *
4387  * Print human readable fault information (SI).
4388  */
4389 static void si_vm_decode_fault(struct radeon_device *rdev,
4390                                u32 status, u32 addr)
4391 {
4392         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4393         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4394         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4395         char *block;
4396
4397         if (rdev->family == CHIP_TAHITI) {
4398                 switch (mc_id) {
4399                 case 160:
4400                 case 144:
4401                 case 96:
4402                 case 80:
4403                 case 224:
4404                 case 208:
4405                 case 32:
4406                 case 16:
4407                         block = "CB";
4408                         break;
4409                 case 161:
4410                 case 145:
4411                 case 97:
4412                 case 81:
4413                 case 225:
4414                 case 209:
4415                 case 33:
4416                 case 17:
4417                         block = "CB_FMASK";
4418                         break;
4419                 case 162:
4420                 case 146:
4421                 case 98:
4422                 case 82:
4423                 case 226:
4424                 case 210:
4425                 case 34:
4426                 case 18:
4427                         block = "CB_CMASK";
4428                         break;
4429                 case 163:
4430                 case 147:
4431                 case 99:
4432                 case 83:
4433                 case 227:
4434                 case 211:
4435                 case 35:
4436                 case 19:
4437                         block = "CB_IMMED";
4438                         break;
4439                 case 164:
4440                 case 148:
4441                 case 100:
4442                 case 84:
4443                 case 228:
4444                 case 212:
4445                 case 36:
4446                 case 20:
4447                         block = "DB";
4448                         break;
4449                 case 165:
4450                 case 149:
4451                 case 101:
4452                 case 85:
4453                 case 229:
4454                 case 213:
4455                 case 37:
4456                 case 21:
4457                         block = "DB_HTILE";
4458                         break;
4459                 case 167:
4460                 case 151:
4461                 case 103:
4462                 case 87:
4463                 case 231:
4464                 case 215:
4465                 case 39:
4466                 case 23:
4467                         block = "DB_STEN";
4468                         break;
4469                 case 72:
4470                 case 68:
4471                 case 64:
4472                 case 8:
4473                 case 4:
4474                 case 0:
4475                 case 136:
4476                 case 132:
4477                 case 128:
4478                 case 200:
4479                 case 196:
4480                 case 192:
4481                         block = "TC";
4482                         break;
4483                 case 112:
4484                 case 48:
4485                         block = "CP";
4486                         break;
4487                 case 49:
4488                 case 177:
4489                 case 50:
4490                 case 178:
4491                         block = "SH";
4492                         break;
4493                 case 53:
4494                 case 190:
4495                         block = "VGT";
4496                         break;
4497                 case 117:
4498                         block = "IH";
4499                         break;
4500                 case 51:
4501                 case 115:
4502                         block = "RLC";
4503                         break;
4504                 case 119:
4505                 case 183:
4506                         block = "DMA0";
4507                         break;
4508                 case 61:
4509                         block = "DMA1";
4510                         break;
4511                 case 248:
4512                 case 120:
4513                         block = "HDP";
4514                         break;
4515                 default:
4516                         block = "unknown";
4517                         break;
4518                 }
4519         } else {
4520                 switch (mc_id) {
4521                 case 32:
4522                 case 16:
4523                 case 96:
4524                 case 80:
4525                 case 160:
4526                 case 144:
4527                 case 224:
4528                 case 208:
4529                         block = "CB";
4530                         break;
4531                 case 33:
4532                 case 17:
4533                 case 97:
4534                 case 81:
4535                 case 161:
4536                 case 145:
4537                 case 225:
4538                 case 209:
4539                         block = "CB_FMASK";
4540                         break;
4541                 case 34:
4542                 case 18:
4543                 case 98:
4544                 case 82:
4545                 case 162:
4546                 case 146:
4547                 case 226:
4548                 case 210:
4549                         block = "CB_CMASK";
4550                         break;
4551                 case 35:
4552                 case 19:
4553                 case 99:
4554                 case 83:
4555                 case 163:
4556                 case 147:
4557                 case 227:
4558                 case 211:
4559                         block = "CB_IMMED";
4560                         break;
4561                 case 36:
4562                 case 20:
4563                 case 100:
4564                 case 84:
4565                 case 164:
4566                 case 148:
4567                 case 228:
4568                 case 212:
4569                         block = "DB";
4570                         break;
4571                 case 37:
4572                 case 21:
4573                 case 101:
4574                 case 85:
4575                 case 165:
4576                 case 149:
4577                 case 229:
4578                 case 213:
4579                         block = "DB_HTILE";
4580                         break;
4581                 case 39:
4582                 case 23:
4583                 case 103:
4584                 case 87:
4585                 case 167:
4586                 case 151:
4587                 case 231:
4588                 case 215:
4589                         block = "DB_STEN";
4590                         break;
4591                 case 72:
4592                 case 68:
4593                 case 8:
4594                 case 4:
4595                 case 136:
4596                 case 132:
4597                 case 200:
4598                 case 196:
4599                         block = "TC";
4600                         break;
4601                 case 112:
4602                 case 48:
4603                         block = "CP";
4604                         break;
4605                 case 49:
4606                 case 177:
4607                 case 50:
4608                 case 178:
4609                         block = "SH";
4610                         break;
4611                 case 53:
4612                         block = "VGT";
4613                         break;
4614                 case 117:
4615                         block = "IH";
4616                         break;
4617                 case 51:
4618                 case 115:
4619                         block = "RLC";
4620                         break;
4621                 case 119:
4622                 case 183:
4623                         block = "DMA0";
4624                         break;
4625                 case 61:
4626                         block = "DMA1";
4627                         break;
4628                 case 248:
4629                 case 120:
4630                         block = "HDP";
4631                         break;
4632                 default:
4633                         block = "unknown";
4634                         break;
4635                 }
4636         }
4637
4638         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4639                protections, vmid, addr,
4640                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4641                block, mc_id);
4642 }
4643
4644 /**
4645  * si_vm_set_page - update the page tables using the CP
4646  *
4647  * @rdev: radeon_device pointer
4648  * @ib: indirect buffer to fill with commands
4649  * @pe: addr of the page entry
4650  * @addr: dst addr to write into pe
4651  * @count: number of page entries to update
4652  * @incr: increase next addr by incr bytes
4653  * @flags: access flags
4654  *
4655  * Update the page tables using the CP (SI).
4656  */
4657 void si_vm_set_page(struct radeon_device *rdev,
4658                     struct radeon_ib *ib,
4659                     uint64_t pe,
4660                     uint64_t addr, unsigned count,
4661                     uint32_t incr, uint32_t flags)
4662 {
4663         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4664         uint64_t value;
4665         unsigned ndw;
4666
4667         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4668                 while (count) {
4669                         ndw = 2 + count * 2;
4670                         if (ndw > 0x3FFE)
4671                                 ndw = 0x3FFE;
4672
4673                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4674                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4675                                         WRITE_DATA_DST_SEL(1));
4676                         ib->ptr[ib->length_dw++] = pe;
4677                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4678                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4679                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4680                                         value = radeon_vm_map_gart(rdev, addr);
4681                                         value &= 0xFFFFFFFFFFFFF000ULL;
4682                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4683                                         value = addr;
4684                                 } else {
4685                                         value = 0;
4686                                 }
4687                                 addr += incr;
4688                                 value |= r600_flags;
4689                                 ib->ptr[ib->length_dw++] = value;
4690                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4691                         }
4692                 }
4693         } else {
4694                 /* DMA */
4695                 si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4696         }
4697 }
4698
4699 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4700 {
4701         struct radeon_ring *ring = &rdev->ring[ridx];
4702
4703         if (vm == NULL)
4704                 return;
4705
4706         /* write new base address */
4707         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4708         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4709                                  WRITE_DATA_DST_SEL(0)));
4710
4711         if (vm->id < 8) {
4712                 radeon_ring_write(ring,
4713                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4714         } else {
4715                 radeon_ring_write(ring,
4716                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4717         }
4718         radeon_ring_write(ring, 0);
4719         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4720
4721         /* flush hdp cache */
4722         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4723         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4724                                  WRITE_DATA_DST_SEL(0)));
4725         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4726         radeon_ring_write(ring, 0);
4727         radeon_ring_write(ring, 0x1);
4728
4729         /* bits 0-15 are the VM contexts0-15 */
4730         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4731         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4732                                  WRITE_DATA_DST_SEL(0)));
4733         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4734         radeon_ring_write(ring, 0);
4735         radeon_ring_write(ring, 1 << vm->id);
4736
4737         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4738         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4739         radeon_ring_write(ring, 0x0);
4740 }
4741
4742 /*
4743  *  Power and clock gating
4744  */
4745 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4746 {
4747         int i;
4748
4749         for (i = 0; i < rdev->usec_timeout; i++) {
4750                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4751                         break;
4752                 udelay(1);
4753         }
4754
4755         for (i = 0; i < rdev->usec_timeout; i++) {
4756                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4757                         break;
4758                 udelay(1);
4759         }
4760 }
4761
4762 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4763                                          bool enable)
4764 {
4765         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4766         u32 mask;
4767         int i;
4768
4769         if (enable)
4770                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4771         else
4772                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4773         WREG32(CP_INT_CNTL_RING0, tmp);
4774
4775         if (!enable) {
4776                 /* read a gfx register */
4777                 tmp = RREG32(DB_DEPTH_INFO);
4778
4779                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4780                 for (i = 0; i < rdev->usec_timeout; i++) {
4781                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4782                                 break;
4783                         udelay(1);
4784                 }
4785         }
4786 }
4787
4788 static void si_set_uvd_dcm(struct radeon_device *rdev,
4789                            bool sw_mode)
4790 {
4791         u32 tmp, tmp2;
4792
4793         tmp = RREG32(UVD_CGC_CTRL);
4794         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4795         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4796
4797         if (sw_mode) {
4798                 tmp &= ~0x7ffff800;
4799                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4800         } else {
4801                 tmp |= 0x7ffff800;
4802                 tmp2 = 0;
4803         }
4804
4805         WREG32(UVD_CGC_CTRL, tmp);
4806         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4807 }
4808
4809 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4810 {
4811         bool hw_mode = true;
4812
4813         if (hw_mode) {
4814                 si_set_uvd_dcm(rdev, false);
4815         } else {
4816                 u32 tmp = RREG32(UVD_CGC_CTRL);
4817                 tmp &= ~DCM;
4818                 WREG32(UVD_CGC_CTRL, tmp);
4819         }
4820 }
4821
4822 static u32 si_halt_rlc(struct radeon_device *rdev)
4823 {
4824         u32 data, orig;
4825
4826         orig = data = RREG32(RLC_CNTL);
4827
4828         if (data & RLC_ENABLE) {
4829                 data &= ~RLC_ENABLE;
4830                 WREG32(RLC_CNTL, data);
4831
4832                 si_wait_for_rlc_serdes(rdev);
4833         }
4834
4835         return orig;
4836 }
4837
4838 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4839 {
4840         u32 tmp;
4841
4842         tmp = RREG32(RLC_CNTL);
4843         if (tmp != rlc)
4844                 WREG32(RLC_CNTL, rlc);
4845 }
4846
4847 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4848 {
4849         u32 data, orig;
4850
4851         orig = data = RREG32(DMA_PG);
4852         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4853                 data |= PG_CNTL_ENABLE;
4854         else
4855                 data &= ~PG_CNTL_ENABLE;
4856         if (orig != data)
4857                 WREG32(DMA_PG, data);
4858 }
4859
4860 static void si_init_dma_pg(struct radeon_device *rdev)
4861 {
4862         u32 tmp;
4863
4864         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4865         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4866
4867         for (tmp = 0; tmp < 5; tmp++)
4868                 WREG32(DMA_PGFSM_WRITE, 0);
4869 }
4870
4871 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4872                                bool enable)
4873 {
4874         u32 tmp;
4875
4876         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
4877                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4878                 WREG32(RLC_TTOP_D, tmp);
4879
4880                 tmp = RREG32(RLC_PG_CNTL);
4881                 tmp |= GFX_PG_ENABLE;
4882                 WREG32(RLC_PG_CNTL, tmp);
4883
4884                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4885                 tmp |= AUTO_PG_EN;
4886                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4887         } else {
4888                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4889                 tmp &= ~AUTO_PG_EN;
4890                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4891
4892                 tmp = RREG32(DB_RENDER_CONTROL);
4893         }
4894 }
4895
4896 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4897 {
4898         u32 tmp;
4899
4900         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4901
4902         tmp = RREG32(RLC_PG_CNTL);
4903         tmp |= GFX_PG_SRC;
4904         WREG32(RLC_PG_CNTL, tmp);
4905
4906         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4907
4908         tmp = RREG32(RLC_AUTO_PG_CTRL);
4909
4910         tmp &= ~GRBM_REG_SGIT_MASK;
4911         tmp |= GRBM_REG_SGIT(0x700);
4912         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4913         WREG32(RLC_AUTO_PG_CTRL, tmp);
4914 }
4915
4916 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4917 {
4918         u32 mask = 0, tmp, tmp1;
4919         int i;
4920
4921         si_select_se_sh(rdev, se, sh);
4922         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4923         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4924         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4925
4926         tmp &= 0xffff0000;
4927
4928         tmp |= tmp1;
4929         tmp >>= 16;
4930
4931         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4932                 mask <<= 1;
4933                 mask |= 1;
4934         }
4935
4936         return (~tmp) & mask;
4937 }
4938
4939 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4940 {
4941         u32 i, j, k, active_cu_number = 0;
4942         u32 mask, counter, cu_bitmap;
4943         u32 tmp = 0;
4944
4945         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4946                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4947                         mask = 1;
4948                         cu_bitmap = 0;
4949                         counter  = 0;
4950                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4951                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4952                                         if (counter < 2)
4953                                                 cu_bitmap |= mask;
4954                                         counter++;
4955                                 }
4956                                 mask <<= 1;
4957                         }
4958
4959                         active_cu_number += counter;
4960                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4961                 }
4962         }
4963
4964         WREG32(RLC_PG_AO_CU_MASK, tmp);
4965
4966         tmp = RREG32(RLC_MAX_PG_CU);
4967         tmp &= ~MAX_PU_CU_MASK;
4968         tmp |= MAX_PU_CU(active_cu_number);
4969         WREG32(RLC_MAX_PG_CU, tmp);
4970 }
4971
4972 static void si_enable_cgcg(struct radeon_device *rdev,
4973                            bool enable)
4974 {
4975         u32 data, orig, tmp;
4976
4977         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4978
4979         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4980                 si_enable_gui_idle_interrupt(rdev, true);
4981
4982                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4983
4984                 tmp = si_halt_rlc(rdev);
4985
4986                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4987                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4988                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4989
4990                 si_wait_for_rlc_serdes(rdev);
4991
4992                 si_update_rlc(rdev, tmp);
4993
4994                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4995
4996                 data |= CGCG_EN | CGLS_EN;
4997         } else {
4998                 si_enable_gui_idle_interrupt(rdev, false);
4999
5000                 RREG32(CB_CGTT_SCLK_CTRL);
5001                 RREG32(CB_CGTT_SCLK_CTRL);
5002                 RREG32(CB_CGTT_SCLK_CTRL);
5003                 RREG32(CB_CGTT_SCLK_CTRL);
5004
5005                 data &= ~(CGCG_EN | CGLS_EN);
5006         }
5007
5008         if (orig != data)
5009                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5010 }
5011
5012 static void si_enable_mgcg(struct radeon_device *rdev,
5013                            bool enable)
5014 {
5015         u32 data, orig, tmp = 0;
5016
5017         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5018                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5019                 data = 0x96940200;
5020                 if (orig != data)
5021                         WREG32(CGTS_SM_CTRL_REG, data);
5022
5023                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5024                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5025                         data |= CP_MEM_LS_EN;
5026                         if (orig != data)
5027                                 WREG32(CP_MEM_SLP_CNTL, data);
5028                 }
5029
5030                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5031                 data &= 0xffffffc0;
5032                 if (orig != data)
5033                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5034
5035                 tmp = si_halt_rlc(rdev);
5036
5037                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5038                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5039                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5040
5041                 si_update_rlc(rdev, tmp);
5042         } else {
5043                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5044                 data |= 0x00000003;
5045                 if (orig != data)
5046                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5047
5048                 data = RREG32(CP_MEM_SLP_CNTL);
5049                 if (data & CP_MEM_LS_EN) {
5050                         data &= ~CP_MEM_LS_EN;
5051                         WREG32(CP_MEM_SLP_CNTL, data);
5052                 }
5053                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5054                 data |= LS_OVERRIDE | OVERRIDE;
5055                 if (orig != data)
5056                         WREG32(CGTS_SM_CTRL_REG, data);
5057
5058                 tmp = si_halt_rlc(rdev);
5059
5060                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5061                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5062                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5063
5064                 si_update_rlc(rdev, tmp);
5065         }
5066 }
5067
5068 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5069                                bool enable)
5070 {
5071         u32 orig, data, tmp;
5072
5073         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5074                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5075                 tmp |= 0x3fff;
5076                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5077
5078                 orig = data = RREG32(UVD_CGC_CTRL);
5079                 data |= DCM;
5080                 if (orig != data)
5081                         WREG32(UVD_CGC_CTRL, data);
5082
5083                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5084                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5085         } else {
5086                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5087                 tmp &= ~0x3fff;
5088                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5089
5090                 orig = data = RREG32(UVD_CGC_CTRL);
5091                 data &= ~DCM;
5092                 if (orig != data)
5093                         WREG32(UVD_CGC_CTRL, data);
5094
5095                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5096                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5097         }
5098 }
5099
5100 static const u32 mc_cg_registers[] =
5101 {
5102         MC_HUB_MISC_HUB_CG,
5103         MC_HUB_MISC_SIP_CG,
5104         MC_HUB_MISC_VM_CG,
5105         MC_XPB_CLK_GAT,
5106         ATC_MISC_CG,
5107         MC_CITF_MISC_WR_CG,
5108         MC_CITF_MISC_RD_CG,
5109         MC_CITF_MISC_VM_CG,
5110         VM_L2_CG,
5111 };
5112
5113 static void si_enable_mc_ls(struct radeon_device *rdev,
5114                             bool enable)
5115 {
5116         int i;
5117         u32 orig, data;
5118
5119         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5120                 orig = data = RREG32(mc_cg_registers[i]);
5121                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5122                         data |= MC_LS_ENABLE;
5123                 else
5124                         data &= ~MC_LS_ENABLE;
5125                 if (data != orig)
5126                         WREG32(mc_cg_registers[i], data);
5127         }
5128 }
5129
5130 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5131                                bool enable)
5132 {
5133         int i;
5134         u32 orig, data;
5135
5136         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5137                 orig = data = RREG32(mc_cg_registers[i]);
5138                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5139                         data |= MC_CG_ENABLE;
5140                 else
5141                         data &= ~MC_CG_ENABLE;
5142                 if (data != orig)
5143                         WREG32(mc_cg_registers[i], data);
5144         }
5145 }
5146
5147 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5148                                bool enable)
5149 {
5150         u32 orig, data, offset;
5151         int i;
5152
5153         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5154                 for (i = 0; i < 2; i++) {
5155                         if (i == 0)
5156                                 offset = DMA0_REGISTER_OFFSET;
5157                         else
5158                                 offset = DMA1_REGISTER_OFFSET;
5159                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5160                         data &= ~MEM_POWER_OVERRIDE;
5161                         if (data != orig)
5162                                 WREG32(DMA_POWER_CNTL + offset, data);
5163                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5164                 }
5165         } else {
5166                 for (i = 0; i < 2; i++) {
5167                         if (i == 0)
5168                                 offset = DMA0_REGISTER_OFFSET;
5169                         else
5170                                 offset = DMA1_REGISTER_OFFSET;
5171                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5172                         data |= MEM_POWER_OVERRIDE;
5173                         if (data != orig)
5174                                 WREG32(DMA_POWER_CNTL + offset, data);
5175
5176                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5177                         data = 0xff000000;
5178                         if (data != orig)
5179                                 WREG32(DMA_CLK_CTRL + offset, data);
5180                 }
5181         }
5182 }
5183
5184 static void si_enable_bif_mgls(struct radeon_device *rdev,
5185                                bool enable)
5186 {
5187         u32 orig, data;
5188
5189         orig = data = RREG32_PCIE(PCIE_CNTL2);
5190
5191         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5192                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5193                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5194         else
5195                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5196                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5197
5198         if (orig != data)
5199                 WREG32_PCIE(PCIE_CNTL2, data);
5200 }
5201
5202 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5203                                bool enable)
5204 {
5205         u32 orig, data;
5206
5207         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5208
5209         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5210                 data &= ~CLOCK_GATING_DIS;
5211         else
5212                 data |= CLOCK_GATING_DIS;
5213
5214         if (orig != data)
5215                 WREG32(HDP_HOST_PATH_CNTL, data);
5216 }
5217
5218 static void si_enable_hdp_ls(struct radeon_device *rdev,
5219                              bool enable)
5220 {
5221         u32 orig, data;
5222
5223         orig = data = RREG32(HDP_MEM_POWER_LS);
5224
5225         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5226                 data |= HDP_LS_ENABLE;
5227         else
5228                 data &= ~HDP_LS_ENABLE;
5229
5230         if (orig != data)
5231                 WREG32(HDP_MEM_POWER_LS, data);
5232 }
5233
5234 void si_update_cg(struct radeon_device *rdev,
5235                   u32 block, bool enable)
5236 {
5237         if (block & RADEON_CG_BLOCK_GFX) {
5238                 /* order matters! */
5239                 if (enable) {
5240                         si_enable_mgcg(rdev, true);
5241                         si_enable_cgcg(rdev, true);
5242                 } else {
5243                         si_enable_cgcg(rdev, false);
5244                         si_enable_mgcg(rdev, false);
5245                 }
5246         }
5247
5248         if (block & RADEON_CG_BLOCK_MC) {
5249                 si_enable_mc_mgcg(rdev, enable);
5250                 si_enable_mc_ls(rdev, enable);
5251         }
5252
5253         if (block & RADEON_CG_BLOCK_SDMA) {
5254                 si_enable_dma_mgcg(rdev, enable);
5255         }
5256
5257         if (block & RADEON_CG_BLOCK_BIF) {
5258                 si_enable_bif_mgls(rdev, enable);
5259         }
5260
5261         if (block & RADEON_CG_BLOCK_UVD) {
5262                 if (rdev->has_uvd) {
5263                         si_enable_uvd_mgcg(rdev, enable);
5264                 }
5265         }
5266
5267         if (block & RADEON_CG_BLOCK_HDP) {
5268                 si_enable_hdp_mgcg(rdev, enable);
5269                 si_enable_hdp_ls(rdev, enable);
5270         }
5271 }
5272
5273 static void si_init_cg(struct radeon_device *rdev)
5274 {
5275         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5276                             RADEON_CG_BLOCK_MC |
5277                             RADEON_CG_BLOCK_SDMA |
5278                             RADEON_CG_BLOCK_BIF |
5279                             RADEON_CG_BLOCK_HDP), true);
5280         if (rdev->has_uvd) {
5281                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5282                 si_init_uvd_internal_cg(rdev);
5283         }
5284 }
5285
5286 static void si_fini_cg(struct radeon_device *rdev)
5287 {
5288         if (rdev->has_uvd) {
5289                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5290         }
5291         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5292                             RADEON_CG_BLOCK_MC |
5293                             RADEON_CG_BLOCK_SDMA |
5294                             RADEON_CG_BLOCK_BIF |
5295                             RADEON_CG_BLOCK_HDP), false);
5296 }
5297
5298 u32 si_get_csb_size(struct radeon_device *rdev)
5299 {
5300         u32 count = 0;
5301         const struct cs_section_def *sect = NULL;
5302         const struct cs_extent_def *ext = NULL;
5303
5304         if (rdev->rlc.cs_data == NULL)
5305                 return 0;
5306
5307         /* begin clear state */
5308         count += 2;
5309         /* context control state */
5310         count += 3;
5311
5312         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5313                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5314                         if (sect->id == SECT_CONTEXT)
5315                                 count += 2 + ext->reg_count;
5316                         else
5317                                 return 0;
5318                 }
5319         }
5320         /* pa_sc_raster_config */
5321         count += 3;
5322         /* end clear state */
5323         count += 2;
5324         /* clear state */
5325         count += 2;
5326
5327         return count;
5328 }
5329
5330 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5331 {
5332         u32 count = 0, i;
5333         const struct cs_section_def *sect = NULL;
5334         const struct cs_extent_def *ext = NULL;
5335
5336         if (rdev->rlc.cs_data == NULL)
5337                 return;
5338         if (buffer == NULL)
5339                 return;
5340
5341         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5342         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5343
5344         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5345         buffer[count++] = 0x80000000;
5346         buffer[count++] = 0x80000000;
5347
5348         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5349                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5350                         if (sect->id == SECT_CONTEXT) {
5351                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5352                                 buffer[count++] = ext->reg_index - 0xa000;
5353                                 for (i = 0; i < ext->reg_count; i++)
5354                                         buffer[count++] = ext->extent[i];
5355                         } else {
5356                                 return;
5357                         }
5358                 }
5359         }
5360
5361         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5362         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5363         switch (rdev->family) {
5364         case CHIP_TAHITI:
5365         case CHIP_PITCAIRN:
5366                 buffer[count++] = 0x2a00126a;
5367                 break;
5368         case CHIP_VERDE:
5369                 buffer[count++] = 0x0000124a;
5370                 break;
5371         case CHIP_OLAND:
5372                 buffer[count++] = 0x00000082;
5373                 break;
5374         case CHIP_HAINAN:
5375                 buffer[count++] = 0x00000000;
5376                 break;
5377         default:
5378                 buffer[count++] = 0x00000000;
5379                 break;
5380         }
5381
5382         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5383         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5384
5385         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5386         buffer[count++] = 0;
5387 }
5388
5389 static void si_init_pg(struct radeon_device *rdev)
5390 {
5391         if (rdev->pg_flags) {
5392                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5393                         si_init_dma_pg(rdev);
5394                 }
5395                 si_init_ao_cu_mask(rdev);
5396                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5397                         si_init_gfx_cgpg(rdev);
5398                 }
5399                 si_enable_dma_pg(rdev, true);
5400                 si_enable_gfx_cgpg(rdev, true);
5401         } else {
5402                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5403                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5404         }
5405 }
5406
5407 static void si_fini_pg(struct radeon_device *rdev)
5408 {
5409         if (rdev->pg_flags) {
5410                 si_enable_dma_pg(rdev, false);
5411                 si_enable_gfx_cgpg(rdev, false);
5412         }
5413 }
5414
5415 /*
5416  * RLC
5417  */
5418 void si_rlc_reset(struct radeon_device *rdev)
5419 {
5420         u32 tmp = RREG32(GRBM_SOFT_RESET);
5421
5422         tmp |= SOFT_RESET_RLC;
5423         WREG32(GRBM_SOFT_RESET, tmp);
5424         udelay(50);
5425         tmp &= ~SOFT_RESET_RLC;
5426         WREG32(GRBM_SOFT_RESET, tmp);
5427         udelay(50);
5428 }
5429
5430 static void si_rlc_stop(struct radeon_device *rdev)
5431 {
5432         WREG32(RLC_CNTL, 0);
5433
5434         si_enable_gui_idle_interrupt(rdev, false);
5435
5436         si_wait_for_rlc_serdes(rdev);
5437 }
5438
5439 static void si_rlc_start(struct radeon_device *rdev)
5440 {
5441         WREG32(RLC_CNTL, RLC_ENABLE);
5442
5443         si_enable_gui_idle_interrupt(rdev, true);
5444
5445         udelay(50);
5446 }
5447
5448 static bool si_lbpw_supported(struct radeon_device *rdev)
5449 {
5450         u32 tmp;
5451
5452         /* Enable LBPW only for DDR3 */
5453         tmp = RREG32(MC_SEQ_MISC0);
5454         if ((tmp & 0xF0000000) == 0xB0000000)
5455                 return true;
5456         return false;
5457 }
5458
5459 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5460 {
5461         u32 tmp;
5462
5463         tmp = RREG32(RLC_LB_CNTL);
5464         if (enable)
5465                 tmp |= LOAD_BALANCE_ENABLE;
5466         else
5467                 tmp &= ~LOAD_BALANCE_ENABLE;
5468         WREG32(RLC_LB_CNTL, tmp);
5469
5470         if (!enable) {
5471                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5472                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5473         }
5474 }
5475
5476 static int si_rlc_resume(struct radeon_device *rdev)
5477 {
5478         u32 i;
5479         const __be32 *fw_data;
5480
5481         if (!rdev->rlc_fw)
5482                 return -EINVAL;
5483
5484         si_rlc_stop(rdev);
5485
5486         si_rlc_reset(rdev);
5487
5488         si_init_pg(rdev);
5489
5490         si_init_cg(rdev);
5491
5492         WREG32(RLC_RL_BASE, 0);
5493         WREG32(RLC_RL_SIZE, 0);
5494         WREG32(RLC_LB_CNTL, 0);
5495         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5496         WREG32(RLC_LB_CNTR_INIT, 0);
5497         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5498
5499         WREG32(RLC_MC_CNTL, 0);
5500         WREG32(RLC_UCODE_CNTL, 0);
5501
5502         fw_data = (const __be32 *)rdev->rlc_fw->data;
5503         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5504                 WREG32(RLC_UCODE_ADDR, i);
5505                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5506         }
5507         WREG32(RLC_UCODE_ADDR, 0);
5508
5509         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5510
5511         si_rlc_start(rdev);
5512
5513         return 0;
5514 }
5515
5516 static void si_enable_interrupts(struct radeon_device *rdev)
5517 {
5518         u32 ih_cntl = RREG32(IH_CNTL);
5519         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5520
5521         ih_cntl |= ENABLE_INTR;
5522         ih_rb_cntl |= IH_RB_ENABLE;
5523         WREG32(IH_CNTL, ih_cntl);
5524         WREG32(IH_RB_CNTL, ih_rb_cntl);
5525         rdev->ih.enabled = true;
5526 }
5527
5528 static void si_disable_interrupts(struct radeon_device *rdev)
5529 {
5530         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5531         u32 ih_cntl = RREG32(IH_CNTL);
5532
5533         ih_rb_cntl &= ~IH_RB_ENABLE;
5534         ih_cntl &= ~ENABLE_INTR;
5535         WREG32(IH_RB_CNTL, ih_rb_cntl);
5536         WREG32(IH_CNTL, ih_cntl);
5537         /* set rptr, wptr to 0 */
5538         WREG32(IH_RB_RPTR, 0);
5539         WREG32(IH_RB_WPTR, 0);
5540         rdev->ih.enabled = false;
5541         rdev->ih.rptr = 0;
5542 }
5543
5544 static void si_disable_interrupt_state(struct radeon_device *rdev)
5545 {
5546         u32 tmp;
5547
5548         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5549         WREG32(CP_INT_CNTL_RING1, 0);
5550         WREG32(CP_INT_CNTL_RING2, 0);
5551         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5552         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5553         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5554         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5555         WREG32(GRBM_INT_CNTL, 0);
5556         if (rdev->num_crtc >= 2) {
5557                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5558                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5559         }
5560         if (rdev->num_crtc >= 4) {
5561                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5562                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5563         }
5564         if (rdev->num_crtc >= 6) {
5565                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5566                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5567         }
5568
5569         if (rdev->num_crtc >= 2) {
5570                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5571                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5572         }
5573         if (rdev->num_crtc >= 4) {
5574                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5575                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5576         }
5577         if (rdev->num_crtc >= 6) {
5578                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5579                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5580         }
5581
5582         if (!ASIC_IS_NODCE(rdev)) {
5583                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5584
5585                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5586                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5587                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5588                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5589                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5590                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5591                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5592                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5593                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5594                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5595                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5596                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5597         }
5598 }
5599
5600 static int si_irq_init(struct radeon_device *rdev)
5601 {
5602         int ret = 0;
5603         int rb_bufsz;
5604         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5605
5606         /* allocate ring */
5607         ret = r600_ih_ring_alloc(rdev);
5608         if (ret)
5609                 return ret;
5610
5611         /* disable irqs */
5612         si_disable_interrupts(rdev);
5613
5614         /* init rlc */
5615         ret = si_rlc_resume(rdev);
5616         if (ret) {
5617                 r600_ih_ring_fini(rdev);
5618                 return ret;
5619         }
5620
5621         /* setup interrupt control */
5622         /* set dummy read address to ring address */
5623         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5624         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5625         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5626          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5627          */
5628         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5629         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5630         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5631         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5632
5633         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5634         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5635
5636         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5637                       IH_WPTR_OVERFLOW_CLEAR |
5638                       (rb_bufsz << 1));
5639
5640         if (rdev->wb.enabled)
5641                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5642
5643         /* set the writeback address whether it's enabled or not */
5644         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5645         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5646
5647         WREG32(IH_RB_CNTL, ih_rb_cntl);
5648
5649         /* set rptr, wptr to 0 */
5650         WREG32(IH_RB_RPTR, 0);
5651         WREG32(IH_RB_WPTR, 0);
5652
5653         /* Default settings for IH_CNTL (disabled at first) */
5654         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5655         /* RPTR_REARM only works if msi's are enabled */
5656         if (rdev->msi_enabled)
5657                 ih_cntl |= RPTR_REARM;
5658         WREG32(IH_CNTL, ih_cntl);
5659
5660         /* force the active interrupt state to all disabled */
5661         si_disable_interrupt_state(rdev);
5662
5663         pci_set_master(rdev->pdev);
5664
5665         /* enable irqs */
5666         si_enable_interrupts(rdev);
5667
5668         return ret;
5669 }
5670
5671 int si_irq_set(struct radeon_device *rdev)
5672 {
5673         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5674         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5675         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5676         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5677         u32 grbm_int_cntl = 0;
5678         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5679         u32 dma_cntl, dma_cntl1;
5680         u32 thermal_int = 0;
5681
5682         if (!rdev->irq.installed) {
5683                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5684                 return -EINVAL;
5685         }
5686         /* don't enable anything if the ih is disabled */
5687         if (!rdev->ih.enabled) {
5688                 si_disable_interrupts(rdev);
5689                 /* force the active interrupt state to all disabled */
5690                 si_disable_interrupt_state(rdev);
5691                 return 0;
5692         }
5693
5694         if (!ASIC_IS_NODCE(rdev)) {
5695                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5696                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5697                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5698                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5699                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5700                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5701         }
5702
5703         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5704         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5705
5706         thermal_int = RREG32(CG_THERMAL_INT) &
5707                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5708
5709         /* enable CP interrupts on all rings */
5710         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5711                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5712                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5713         }
5714         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5715                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5716                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5717         }
5718         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5719                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5720                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5721         }
5722         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5723                 DRM_DEBUG("si_irq_set: sw int dma\n");
5724                 dma_cntl |= TRAP_ENABLE;
5725         }
5726
5727         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5728                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5729                 dma_cntl1 |= TRAP_ENABLE;
5730         }
5731         if (rdev->irq.crtc_vblank_int[0] ||
5732             atomic_read(&rdev->irq.pflip[0])) {
5733                 DRM_DEBUG("si_irq_set: vblank 0\n");
5734                 crtc1 |= VBLANK_INT_MASK;
5735         }
5736         if (rdev->irq.crtc_vblank_int[1] ||
5737             atomic_read(&rdev->irq.pflip[1])) {
5738                 DRM_DEBUG("si_irq_set: vblank 1\n");
5739                 crtc2 |= VBLANK_INT_MASK;
5740         }
5741         if (rdev->irq.crtc_vblank_int[2] ||
5742             atomic_read(&rdev->irq.pflip[2])) {
5743                 DRM_DEBUG("si_irq_set: vblank 2\n");
5744                 crtc3 |= VBLANK_INT_MASK;
5745         }
5746         if (rdev->irq.crtc_vblank_int[3] ||
5747             atomic_read(&rdev->irq.pflip[3])) {
5748                 DRM_DEBUG("si_irq_set: vblank 3\n");
5749                 crtc4 |= VBLANK_INT_MASK;
5750         }
5751         if (rdev->irq.crtc_vblank_int[4] ||
5752             atomic_read(&rdev->irq.pflip[4])) {
5753                 DRM_DEBUG("si_irq_set: vblank 4\n");
5754                 crtc5 |= VBLANK_INT_MASK;
5755         }
5756         if (rdev->irq.crtc_vblank_int[5] ||
5757             atomic_read(&rdev->irq.pflip[5])) {
5758                 DRM_DEBUG("si_irq_set: vblank 5\n");
5759                 crtc6 |= VBLANK_INT_MASK;
5760         }
5761         if (rdev->irq.hpd[0]) {
5762                 DRM_DEBUG("si_irq_set: hpd 1\n");
5763                 hpd1 |= DC_HPDx_INT_EN;
5764         }
5765         if (rdev->irq.hpd[1]) {
5766                 DRM_DEBUG("si_irq_set: hpd 2\n");
5767                 hpd2 |= DC_HPDx_INT_EN;
5768         }
5769         if (rdev->irq.hpd[2]) {
5770                 DRM_DEBUG("si_irq_set: hpd 3\n");
5771                 hpd3 |= DC_HPDx_INT_EN;
5772         }
5773         if (rdev->irq.hpd[3]) {
5774                 DRM_DEBUG("si_irq_set: hpd 4\n");
5775                 hpd4 |= DC_HPDx_INT_EN;
5776         }
5777         if (rdev->irq.hpd[4]) {
5778                 DRM_DEBUG("si_irq_set: hpd 5\n");
5779                 hpd5 |= DC_HPDx_INT_EN;
5780         }
5781         if (rdev->irq.hpd[5]) {
5782                 DRM_DEBUG("si_irq_set: hpd 6\n");
5783                 hpd6 |= DC_HPDx_INT_EN;
5784         }
5785
5786         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5787         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5788         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5789
5790         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5791         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5792
5793         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5794
5795         if (rdev->irq.dpm_thermal) {
5796                 DRM_DEBUG("dpm thermal\n");
5797                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5798         }
5799
5800         if (rdev->num_crtc >= 2) {
5801                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5802                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5803         }
5804         if (rdev->num_crtc >= 4) {
5805                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5806                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5807         }
5808         if (rdev->num_crtc >= 6) {
5809                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5810                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5811         }
5812
5813         if (rdev->num_crtc >= 2) {
5814                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5815                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5816         }
5817         if (rdev->num_crtc >= 4) {
5818                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5819                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5820         }
5821         if (rdev->num_crtc >= 6) {
5822                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5823                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5824         }
5825
5826         if (!ASIC_IS_NODCE(rdev)) {
5827                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5828                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5829                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5830                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5831                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5832                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5833         }
5834
5835         WREG32(CG_THERMAL_INT, thermal_int);
5836
5837         return 0;
5838 }
5839
5840 static inline void si_irq_ack(struct radeon_device *rdev)
5841 {
5842         u32 tmp;
5843
5844         if (ASIC_IS_NODCE(rdev))
5845                 return;
5846
5847         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5848         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5849         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5850         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5851         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5852         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5853         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5854         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5855         if (rdev->num_crtc >= 4) {
5856                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5857                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5858         }
5859         if (rdev->num_crtc >= 6) {
5860                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5861                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5862         }
5863
5864         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5865                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5866         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5867                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5868         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5869                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5870         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5871                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5872         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5873                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5874         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5875                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5876
5877         if (rdev->num_crtc >= 4) {
5878                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5879                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5880                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5881                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5882                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5883                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5884                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5885                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5886                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5887                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5888                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5889                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5890         }
5891
5892         if (rdev->num_crtc >= 6) {
5893                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5894                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5895                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5896                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5897                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5898                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5899                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5900                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5901                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5902                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5903                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5904                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5905         }
5906
5907         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5908                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5909                 tmp |= DC_HPDx_INT_ACK;
5910                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5911         }
5912         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5913                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5914                 tmp |= DC_HPDx_INT_ACK;
5915                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5916         }
5917         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5918                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5919                 tmp |= DC_HPDx_INT_ACK;
5920                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5921         }
5922         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5923                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5924                 tmp |= DC_HPDx_INT_ACK;
5925                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5926         }
5927         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5928                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5929                 tmp |= DC_HPDx_INT_ACK;
5930                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5931         }
5932         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5933                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5934                 tmp |= DC_HPDx_INT_ACK;
5935                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5936         }
5937 }
5938
5939 static void si_irq_disable(struct radeon_device *rdev)
5940 {
5941         si_disable_interrupts(rdev);
5942         /* Wait and acknowledge irq */
5943         mdelay(1);
5944         si_irq_ack(rdev);
5945         si_disable_interrupt_state(rdev);
5946 }
5947
5948 static void si_irq_suspend(struct radeon_device *rdev)
5949 {
5950         si_irq_disable(rdev);
5951         si_rlc_stop(rdev);
5952 }
5953
5954 static void si_irq_fini(struct radeon_device *rdev)
5955 {
5956         si_irq_suspend(rdev);
5957         r600_ih_ring_fini(rdev);
5958 }
5959
5960 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5961 {
5962         u32 wptr, tmp;
5963
5964         if (rdev->wb.enabled)
5965                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5966         else
5967                 wptr = RREG32(IH_RB_WPTR);
5968
5969         if (wptr & RB_OVERFLOW) {
5970                 /* When a ring buffer overflow happen start parsing interrupt
5971                  * from the last not overwritten vector (wptr + 16). Hopefully
5972                  * this should allow us to catchup.
5973                  */
5974                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5975                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5976                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5977                 tmp = RREG32(IH_RB_CNTL);
5978                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5979                 WREG32(IH_RB_CNTL, tmp);
5980         }
5981         return (wptr & rdev->ih.ptr_mask);
5982 }
5983
5984 /*        SI IV Ring
5985  * Each IV ring entry is 128 bits:
5986  * [7:0]    - interrupt source id
5987  * [31:8]   - reserved
5988  * [59:32]  - interrupt source data
5989  * [63:60]  - reserved
5990  * [71:64]  - RINGID
5991  * [79:72]  - VMID
5992  * [127:80] - reserved
5993  */
5994 int si_irq_process(struct radeon_device *rdev)
5995 {
5996         u32 wptr;
5997         u32 rptr;
5998         u32 src_id, src_data, ring_id;
5999         u32 ring_index;
6000         bool queue_hotplug = false;
6001         bool queue_thermal = false;
6002         u32 status, addr;
6003
6004         if (!rdev->ih.enabled || rdev->shutdown)
6005                 return IRQ_NONE;
6006
6007         wptr = si_get_ih_wptr(rdev);
6008
6009 restart_ih:
6010         /* is somebody else already processing irqs? */
6011         if (atomic_xchg(&rdev->ih.lock, 1))
6012                 return IRQ_NONE;
6013
6014         rptr = rdev->ih.rptr;
6015         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6016
6017         /* Order reading of wptr vs. reading of IH ring data */
6018         rmb();
6019
6020         /* display interrupts */
6021         si_irq_ack(rdev);
6022
6023         while (rptr != wptr) {
6024                 /* wptr/rptr are in bytes! */
6025                 ring_index = rptr / 4;
6026                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6027                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6028                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6029
6030                 switch (src_id) {
6031                 case 1: /* D1 vblank/vline */
6032                         switch (src_data) {
6033                         case 0: /* D1 vblank */
6034                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6035                                         if (rdev->irq.crtc_vblank_int[0]) {
6036                                                 drm_handle_vblank(rdev->ddev, 0);
6037                                                 rdev->pm.vblank_sync = true;
6038                                                 wake_up(&rdev->irq.vblank_queue);
6039                                         }
6040                                         if (atomic_read(&rdev->irq.pflip[0]))
6041                                                 radeon_crtc_handle_flip(rdev, 0);
6042                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6043                                         DRM_DEBUG("IH: D1 vblank\n");
6044                                 }
6045                                 break;
6046                         case 1: /* D1 vline */
6047                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6048                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6049                                         DRM_DEBUG("IH: D1 vline\n");
6050                                 }
6051                                 break;
6052                         default:
6053                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6054                                 break;
6055                         }
6056                         break;
6057                 case 2: /* D2 vblank/vline */
6058                         switch (src_data) {
6059                         case 0: /* D2 vblank */
6060                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6061                                         if (rdev->irq.crtc_vblank_int[1]) {
6062                                                 drm_handle_vblank(rdev->ddev, 1);
6063                                                 rdev->pm.vblank_sync = true;
6064                                                 wake_up(&rdev->irq.vblank_queue);
6065                                         }
6066                                         if (atomic_read(&rdev->irq.pflip[1]))
6067                                                 radeon_crtc_handle_flip(rdev, 1);
6068                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6069                                         DRM_DEBUG("IH: D2 vblank\n");
6070                                 }
6071                                 break;
6072                         case 1: /* D2 vline */
6073                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6074                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6075                                         DRM_DEBUG("IH: D2 vline\n");
6076                                 }
6077                                 break;
6078                         default:
6079                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6080                                 break;
6081                         }
6082                         break;
6083                 case 3: /* D3 vblank/vline */
6084                         switch (src_data) {
6085                         case 0: /* D3 vblank */
6086                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6087                                         if (rdev->irq.crtc_vblank_int[2]) {
6088                                                 drm_handle_vblank(rdev->ddev, 2);
6089                                                 rdev->pm.vblank_sync = true;
6090                                                 wake_up(&rdev->irq.vblank_queue);
6091                                         }
6092                                         if (atomic_read(&rdev->irq.pflip[2]))
6093                                                 radeon_crtc_handle_flip(rdev, 2);
6094                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6095                                         DRM_DEBUG("IH: D3 vblank\n");
6096                                 }
6097                                 break;
6098                         case 1: /* D3 vline */
6099                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6100                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6101                                         DRM_DEBUG("IH: D3 vline\n");
6102                                 }
6103                                 break;
6104                         default:
6105                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6106                                 break;
6107                         }
6108                         break;
6109                 case 4: /* D4 vblank/vline */
6110                         switch (src_data) {
6111                         case 0: /* D4 vblank */
6112                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6113                                         if (rdev->irq.crtc_vblank_int[3]) {
6114                                                 drm_handle_vblank(rdev->ddev, 3);
6115                                                 rdev->pm.vblank_sync = true;
6116                                                 wake_up(&rdev->irq.vblank_queue);
6117                                         }
6118                                         if (atomic_read(&rdev->irq.pflip[3]))
6119                                                 radeon_crtc_handle_flip(rdev, 3);
6120                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6121                                         DRM_DEBUG("IH: D4 vblank\n");
6122                                 }
6123                                 break;
6124                         case 1: /* D4 vline */
6125                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6126                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6127                                         DRM_DEBUG("IH: D4 vline\n");
6128                                 }
6129                                 break;
6130                         default:
6131                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6132                                 break;
6133                         }
6134                         break;
6135                 case 5: /* D5 vblank/vline */
6136                         switch (src_data) {
6137                         case 0: /* D5 vblank */
6138                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6139                                         if (rdev->irq.crtc_vblank_int[4]) {
6140                                                 drm_handle_vblank(rdev->ddev, 4);
6141                                                 rdev->pm.vblank_sync = true;
6142                                                 wake_up(&rdev->irq.vblank_queue);
6143                                         }
6144                                         if (atomic_read(&rdev->irq.pflip[4]))
6145                                                 radeon_crtc_handle_flip(rdev, 4);
6146                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6147                                         DRM_DEBUG("IH: D5 vblank\n");
6148                                 }
6149                                 break;
6150                         case 1: /* D5 vline */
6151                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6152                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6153                                         DRM_DEBUG("IH: D5 vline\n");
6154                                 }
6155                                 break;
6156                         default:
6157                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6158                                 break;
6159                         }
6160                         break;
6161                 case 6: /* D6 vblank/vline */
6162                         switch (src_data) {
6163                         case 0: /* D6 vblank */
6164                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6165                                         if (rdev->irq.crtc_vblank_int[5]) {
6166                                                 drm_handle_vblank(rdev->ddev, 5);
6167                                                 rdev->pm.vblank_sync = true;
6168                                                 wake_up(&rdev->irq.vblank_queue);
6169                                         }
6170                                         if (atomic_read(&rdev->irq.pflip[5]))
6171                                                 radeon_crtc_handle_flip(rdev, 5);
6172                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6173                                         DRM_DEBUG("IH: D6 vblank\n");
6174                                 }
6175                                 break;
6176                         case 1: /* D6 vline */
6177                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6178                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6179                                         DRM_DEBUG("IH: D6 vline\n");
6180                                 }
6181                                 break;
6182                         default:
6183                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6184                                 break;
6185                         }
6186                         break;
6187                 case 42: /* HPD hotplug */
6188                         switch (src_data) {
6189                         case 0:
6190                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6191                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6192                                         queue_hotplug = true;
6193                                         DRM_DEBUG("IH: HPD1\n");
6194                                 }
6195                                 break;
6196                         case 1:
6197                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6198                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6199                                         queue_hotplug = true;
6200                                         DRM_DEBUG("IH: HPD2\n");
6201                                 }
6202                                 break;
6203                         case 2:
6204                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6205                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6206                                         queue_hotplug = true;
6207                                         DRM_DEBUG("IH: HPD3\n");
6208                                 }
6209                                 break;
6210                         case 3:
6211                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6212                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6213                                         queue_hotplug = true;
6214                                         DRM_DEBUG("IH: HPD4\n");
6215                                 }
6216                                 break;
6217                         case 4:
6218                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6219                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6220                                         queue_hotplug = true;
6221                                         DRM_DEBUG("IH: HPD5\n");
6222                                 }
6223                                 break;
6224                         case 5:
6225                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6226                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6227                                         queue_hotplug = true;
6228                                         DRM_DEBUG("IH: HPD6\n");
6229                                 }
6230                                 break;
6231                         default:
6232                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6233                                 break;
6234                         }
6235                         break;
6236                 case 146:
6237                 case 147:
6238                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6239                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6240                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6241                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6242                                 addr);
6243                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6244                                 status);
6245                         si_vm_decode_fault(rdev, status, addr);
6246                         /* reset addr and status */
6247                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6248                         break;
6249                 case 176: /* RINGID0 CP_INT */
6250                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6251                         break;
6252                 case 177: /* RINGID1 CP_INT */
6253                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6254                         break;
6255                 case 178: /* RINGID2 CP_INT */
6256                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6257                         break;
6258                 case 181: /* CP EOP event */
6259                         DRM_DEBUG("IH: CP EOP\n");
6260                         switch (ring_id) {
6261                         case 0:
6262                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6263                                 break;
6264                         case 1:
6265                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6266                                 break;
6267                         case 2:
6268                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6269                                 break;
6270                         }
6271                         break;
6272                 case 224: /* DMA trap event */
6273                         DRM_DEBUG("IH: DMA trap\n");
6274                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6275                         break;
6276                 case 230: /* thermal low to high */
6277                         DRM_DEBUG("IH: thermal low to high\n");
6278                         rdev->pm.dpm.thermal.high_to_low = false;
6279                         queue_thermal = true;
6280                         break;
6281                 case 231: /* thermal high to low */
6282                         DRM_DEBUG("IH: thermal high to low\n");
6283                         rdev->pm.dpm.thermal.high_to_low = true;
6284                         queue_thermal = true;
6285                         break;
6286                 case 233: /* GUI IDLE */
6287                         DRM_DEBUG("IH: GUI idle\n");
6288                         break;
6289                 case 244: /* DMA trap event */
6290                         DRM_DEBUG("IH: DMA1 trap\n");
6291                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6292                         break;
6293                 default:
6294                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6295                         break;
6296                 }
6297
6298                 /* wptr/rptr are in bytes! */
6299                 rptr += 16;
6300                 rptr &= rdev->ih.ptr_mask;
6301         }
6302         if (queue_hotplug)
6303                 schedule_work(&rdev->hotplug_work);
6304         if (queue_thermal && rdev->pm.dpm_enabled)
6305                 schedule_work(&rdev->pm.dpm.thermal.work);
6306         rdev->ih.rptr = rptr;
6307         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6308         atomic_set(&rdev->ih.lock, 0);
6309
6310         /* make sure wptr hasn't changed while processing */
6311         wptr = si_get_ih_wptr(rdev);
6312         if (wptr != rptr)
6313                 goto restart_ih;
6314
6315         return IRQ_HANDLED;
6316 }
6317
6318 /*
6319  * startup/shutdown callbacks
6320  */
6321 static int si_startup(struct radeon_device *rdev)
6322 {
6323         struct radeon_ring *ring;
6324         int r;
6325
6326         /* enable pcie gen2/3 link */
6327         si_pcie_gen3_enable(rdev);
6328         /* enable aspm */
6329         si_program_aspm(rdev);
6330
6331         si_mc_program(rdev);
6332
6333         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6334             !rdev->rlc_fw || !rdev->mc_fw) {
6335                 r = si_init_microcode(rdev);
6336                 if (r) {
6337                         DRM_ERROR("Failed to load firmware!\n");
6338                         return r;
6339                 }
6340         }
6341
6342         r = si_mc_load_microcode(rdev);
6343         if (r) {
6344                 DRM_ERROR("Failed to load MC firmware!\n");
6345                 return r;
6346         }
6347
6348         r = r600_vram_scratch_init(rdev);
6349         if (r)
6350                 return r;
6351
6352         r = si_pcie_gart_enable(rdev);
6353         if (r)
6354                 return r;
6355         si_gpu_init(rdev);
6356
6357         /* allocate rlc buffers */
6358         if (rdev->family == CHIP_VERDE) {
6359                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6360                 rdev->rlc.reg_list_size =
6361                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6362         }
6363         rdev->rlc.cs_data = si_cs_data;
6364         r = sumo_rlc_init(rdev);
6365         if (r) {
6366                 DRM_ERROR("Failed to init rlc BOs!\n");
6367                 return r;
6368         }
6369
6370         /* allocate wb buffer */
6371         r = radeon_wb_init(rdev);
6372         if (r)
6373                 return r;
6374
6375         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6376         if (r) {
6377                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6378                 return r;
6379         }
6380
6381         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6382         if (r) {
6383                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6384                 return r;
6385         }
6386
6387         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6388         if (r) {
6389                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6390                 return r;
6391         }
6392
6393         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6394         if (r) {
6395                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6396                 return r;
6397         }
6398
6399         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6400         if (r) {
6401                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6402                 return r;
6403         }
6404
6405         if (rdev->has_uvd) {
6406                 r = uvd_v2_2_resume(rdev);
6407                 if (!r) {
6408                         r = radeon_fence_driver_start_ring(rdev,
6409                                                            R600_RING_TYPE_UVD_INDEX);
6410                         if (r)
6411                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6412                 }
6413                 if (r)
6414                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6415         }
6416
6417         /* Enable IRQ */
6418         if (!rdev->irq.installed) {
6419                 r = radeon_irq_kms_init(rdev);
6420                 if (r)
6421                         return r;
6422         }
6423
6424         r = si_irq_init(rdev);
6425         if (r) {
6426                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6427                 radeon_irq_kms_fini(rdev);
6428                 return r;
6429         }
6430         si_irq_set(rdev);
6431
6432         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6433         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6434                              CP_RB0_RPTR, CP_RB0_WPTR,
6435                              RADEON_CP_PACKET2);
6436         if (r)
6437                 return r;
6438
6439         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6440         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6441                              CP_RB1_RPTR, CP_RB1_WPTR,
6442                              RADEON_CP_PACKET2);
6443         if (r)
6444                 return r;
6445
6446         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6447         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6448                              CP_RB2_RPTR, CP_RB2_WPTR,
6449                              RADEON_CP_PACKET2);
6450         if (r)
6451                 return r;
6452
6453         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6454         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6455                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6456                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6457                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6458         if (r)
6459                 return r;
6460
6461         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6462         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6463                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6464                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6465                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6466         if (r)
6467                 return r;
6468
6469         r = si_cp_load_microcode(rdev);
6470         if (r)
6471                 return r;
6472         r = si_cp_resume(rdev);
6473         if (r)
6474                 return r;
6475
6476         r = cayman_dma_resume(rdev);
6477         if (r)
6478                 return r;
6479
6480         if (rdev->has_uvd) {
6481                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6482                 if (ring->ring_size) {
6483                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6484                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6485                                              RADEON_CP_PACKET2);
6486                         if (!r)
6487                                 r = uvd_v1_0_init(rdev);
6488                         if (r)
6489                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6490                 }
6491         }
6492
6493         r = radeon_ib_pool_init(rdev);
6494         if (r) {
6495                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6496                 return r;
6497         }
6498
6499         r = radeon_vm_manager_init(rdev);
6500         if (r) {
6501                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6502                 return r;
6503         }
6504
6505         r = dce6_audio_init(rdev);
6506         if (r)
6507                 return r;
6508
6509         return 0;
6510 }
6511
6512 int si_resume(struct radeon_device *rdev)
6513 {
6514         int r;
6515
6516         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6517          * posting will perform necessary task to bring back GPU into good
6518          * shape.
6519          */
6520         /* post card */
6521         atom_asic_init(rdev->mode_info.atom_context);
6522
6523         /* init golden registers */
6524         si_init_golden_registers(rdev);
6525
6526         rdev->accel_working = true;
6527         r = si_startup(rdev);
6528         if (r) {
6529                 DRM_ERROR("si startup failed on resume\n");
6530                 rdev->accel_working = false;
6531                 return r;
6532         }
6533
6534         return r;
6535
6536 }
6537
6538 int si_suspend(struct radeon_device *rdev)
6539 {
6540         dce6_audio_fini(rdev);
6541         radeon_vm_manager_fini(rdev);
6542         si_cp_enable(rdev, false);
6543         cayman_dma_stop(rdev);
6544         if (rdev->has_uvd) {
6545                 uvd_v1_0_fini(rdev);
6546                 radeon_uvd_suspend(rdev);
6547         }
6548         si_fini_pg(rdev);
6549         si_fini_cg(rdev);
6550         si_irq_suspend(rdev);
6551         radeon_wb_disable(rdev);
6552         si_pcie_gart_disable(rdev);
6553         return 0;
6554 }
6555
6556 /* Plan is to move initialization in that function and use
6557  * helper function so that radeon_device_init pretty much
6558  * do nothing more than calling asic specific function. This
6559  * should also allow to remove a bunch of callback function
6560  * like vram_info.
6561  */
6562 int si_init(struct radeon_device *rdev)
6563 {
6564         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6565         int r;
6566
6567         /* Read BIOS */
6568         if (!radeon_get_bios(rdev)) {
6569                 if (ASIC_IS_AVIVO(rdev))
6570                         return -EINVAL;
6571         }
6572         /* Must be an ATOMBIOS */
6573         if (!rdev->is_atom_bios) {
6574                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6575                 return -EINVAL;
6576         }
6577         r = radeon_atombios_init(rdev);
6578         if (r)
6579                 return r;
6580
6581         /* Post card if necessary */
6582         if (!radeon_card_posted(rdev)) {
6583                 if (!rdev->bios) {
6584                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6585                         return -EINVAL;
6586                 }
6587                 DRM_INFO("GPU not posted. posting now...\n");
6588                 atom_asic_init(rdev->mode_info.atom_context);
6589         }
6590         /* init golden registers */
6591         si_init_golden_registers(rdev);
6592         /* Initialize scratch registers */
6593         si_scratch_init(rdev);
6594         /* Initialize surface registers */
6595         radeon_surface_init(rdev);
6596         /* Initialize clocks */
6597         radeon_get_clock_info(rdev->ddev);
6598
6599         /* Fence driver */
6600         r = radeon_fence_driver_init(rdev);
6601         if (r)
6602                 return r;
6603
6604         /* initialize memory controller */
6605         r = si_mc_init(rdev);
6606         if (r)
6607                 return r;
6608         /* Memory manager */
6609         r = radeon_bo_init(rdev);
6610         if (r)
6611                 return r;
6612
6613         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6614         ring->ring_obj = NULL;
6615         r600_ring_init(rdev, ring, 1024 * 1024);
6616
6617         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6618         ring->ring_obj = NULL;
6619         r600_ring_init(rdev, ring, 1024 * 1024);
6620
6621         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6622         ring->ring_obj = NULL;
6623         r600_ring_init(rdev, ring, 1024 * 1024);
6624
6625         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6626         ring->ring_obj = NULL;
6627         r600_ring_init(rdev, ring, 64 * 1024);
6628
6629         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6630         ring->ring_obj = NULL;
6631         r600_ring_init(rdev, ring, 64 * 1024);
6632
6633         if (rdev->has_uvd) {
6634                 r = radeon_uvd_init(rdev);
6635                 if (!r) {
6636                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6637                         ring->ring_obj = NULL;
6638                         r600_ring_init(rdev, ring, 4096);
6639                 }
6640         }
6641
6642         rdev->ih.ring_obj = NULL;
6643         r600_ih_ring_init(rdev, 64 * 1024);
6644
6645         r = r600_pcie_gart_init(rdev);
6646         if (r)
6647                 return r;
6648
6649         rdev->accel_working = true;
6650         r = si_startup(rdev);
6651         if (r) {
6652                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6653                 si_cp_fini(rdev);
6654                 cayman_dma_fini(rdev);
6655                 si_irq_fini(rdev);
6656                 sumo_rlc_fini(rdev);
6657                 radeon_wb_fini(rdev);
6658                 radeon_ib_pool_fini(rdev);
6659                 radeon_vm_manager_fini(rdev);
6660                 radeon_irq_kms_fini(rdev);
6661                 si_pcie_gart_fini(rdev);
6662                 rdev->accel_working = false;
6663         }
6664
6665         /* Don't start up if the MC ucode is missing.
6666          * The default clocks and voltages before the MC ucode
6667          * is loaded are not suffient for advanced operations.
6668          */
6669         if (!rdev->mc_fw) {
6670                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6671                 return -EINVAL;
6672         }
6673
6674         return 0;
6675 }
6676
6677 void si_fini(struct radeon_device *rdev)
6678 {
6679         si_cp_fini(rdev);
6680         cayman_dma_fini(rdev);
6681         si_fini_pg(rdev);
6682         si_fini_cg(rdev);
6683         si_irq_fini(rdev);
6684         sumo_rlc_fini(rdev);
6685         radeon_wb_fini(rdev);
6686         radeon_vm_manager_fini(rdev);
6687         radeon_ib_pool_fini(rdev);
6688         radeon_irq_kms_fini(rdev);
6689         if (rdev->has_uvd) {
6690                 uvd_v1_0_fini(rdev);
6691                 radeon_uvd_fini(rdev);
6692         }
6693         si_pcie_gart_fini(rdev);
6694         r600_vram_scratch_fini(rdev);
6695         radeon_gem_fini(rdev);
6696         radeon_fence_driver_fini(rdev);
6697         radeon_bo_fini(rdev);
6698         radeon_atombios_fini(rdev);
6699         kfree(rdev->bios);
6700         rdev->bios = NULL;
6701 }
6702
6703 /**
6704  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6705  *
6706  * @rdev: radeon_device pointer
6707  *
6708  * Fetches a GPU clock counter snapshot (SI).
6709  * Returns the 64 bit clock counter snapshot.
6710  */
6711 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6712 {
6713         uint64_t clock;
6714
6715         mutex_lock(&rdev->gpu_clock_mutex);
6716         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6717         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6718                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6719         mutex_unlock(&rdev->gpu_clock_mutex);
6720         return clock;
6721 }
6722
6723 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6724 {
6725         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6726         int r;
6727
6728         /* bypass vclk and dclk with bclk */
6729         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6730                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6731                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6732
6733         /* put PLL in bypass mode */
6734         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6735
6736         if (!vclk || !dclk) {
6737                 /* keep the Bypass mode, put PLL to sleep */
6738                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6739                 return 0;
6740         }
6741
6742         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6743                                           16384, 0x03FFFFFF, 0, 128, 5,
6744                                           &fb_div, &vclk_div, &dclk_div);
6745         if (r)
6746                 return r;
6747
6748         /* set RESET_ANTI_MUX to 0 */
6749         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6750
6751         /* set VCO_MODE to 1 */
6752         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6753
6754         /* toggle UPLL_SLEEP to 1 then back to 0 */
6755         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6756         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6757
6758         /* deassert UPLL_RESET */
6759         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6760
6761         mdelay(1);
6762
6763         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6764         if (r)
6765                 return r;
6766
6767         /* assert UPLL_RESET again */
6768         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6769
6770         /* disable spread spectrum. */
6771         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6772
6773         /* set feedback divider */
6774         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6775
6776         /* set ref divider to 0 */
6777         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6778
6779         if (fb_div < 307200)
6780                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6781         else
6782                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6783
6784         /* set PDIV_A and PDIV_B */
6785         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6786                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6787                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6788
6789         /* give the PLL some time to settle */
6790         mdelay(15);
6791
6792         /* deassert PLL_RESET */
6793         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6794
6795         mdelay(15);
6796
6797         /* switch from bypass mode to normal mode */
6798         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6799
6800         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6801         if (r)
6802                 return r;
6803
6804         /* switch VCLK and DCLK selection */
6805         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6806                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6807                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6808
6809         mdelay(100);
6810
6811         return 0;
6812 }
6813
6814 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6815 {
6816         struct pci_dev *root = rdev->pdev->bus->self;
6817         int bridge_pos, gpu_pos;
6818         u32 speed_cntl, mask, current_data_rate;
6819         int ret, i;
6820         u16 tmp16;
6821
6822         if (radeon_pcie_gen2 == 0)
6823                 return;
6824
6825         if (rdev->flags & RADEON_IS_IGP)
6826                 return;
6827
6828         if (!(rdev->flags & RADEON_IS_PCIE))
6829                 return;
6830
6831         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6832         if (ret != 0)
6833                 return;
6834
6835         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6836                 return;
6837
6838         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6839         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6840                 LC_CURRENT_DATA_RATE_SHIFT;
6841         if (mask & DRM_PCIE_SPEED_80) {
6842                 if (current_data_rate == 2) {
6843                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6844                         return;
6845                 }
6846                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6847         } else if (mask & DRM_PCIE_SPEED_50) {
6848                 if (current_data_rate == 1) {
6849                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6850                         return;
6851                 }
6852                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6853         }
6854
6855         bridge_pos = pci_pcie_cap(root);
6856         if (!bridge_pos)
6857                 return;
6858
6859         gpu_pos = pci_pcie_cap(rdev->pdev);
6860         if (!gpu_pos)
6861                 return;
6862
6863         if (mask & DRM_PCIE_SPEED_80) {
6864                 /* re-try equalization if gen3 is not already enabled */
6865                 if (current_data_rate != 2) {
6866                         u16 bridge_cfg, gpu_cfg;
6867                         u16 bridge_cfg2, gpu_cfg2;
6868                         u32 max_lw, current_lw, tmp;
6869
6870                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6871                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6872
6873                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6874                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6875
6876                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6877                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6878
6879                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6880                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6881                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6882
6883                         if (current_lw < max_lw) {
6884                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6885                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6886                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6887                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6888                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6889                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6890                                 }
6891                         }
6892
6893                         for (i = 0; i < 10; i++) {
6894                                 /* check status */
6895                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6896                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6897                                         break;
6898
6899                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6900                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6901
6902                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6903                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6904
6905                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6906                                 tmp |= LC_SET_QUIESCE;
6907                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6908
6909                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6910                                 tmp |= LC_REDO_EQ;
6911                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6912
6913                                 mdelay(100);
6914
6915                                 /* linkctl */
6916                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6917                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6918                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6919                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6920
6921                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6922                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6923                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6924                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6925
6926                                 /* linkctl2 */
6927                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6928                                 tmp16 &= ~((1 << 4) | (7 << 9));
6929                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6930                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6931
6932                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6933                                 tmp16 &= ~((1 << 4) | (7 << 9));
6934                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6935                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6936
6937                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6938                                 tmp &= ~LC_SET_QUIESCE;
6939                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6940                         }
6941                 }
6942         }
6943
6944         /* set the link speed */
6945         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6946         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6947         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6948
6949         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6950         tmp16 &= ~0xf;
6951         if (mask & DRM_PCIE_SPEED_80)
6952                 tmp16 |= 3; /* gen3 */
6953         else if (mask & DRM_PCIE_SPEED_50)
6954                 tmp16 |= 2; /* gen2 */
6955         else
6956                 tmp16 |= 1; /* gen1 */
6957         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6958
6959         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6960         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6961         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6962
6963         for (i = 0; i < rdev->usec_timeout; i++) {
6964                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6965                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6966                         break;
6967                 udelay(1);
6968         }
6969 }
6970
6971 static void si_program_aspm(struct radeon_device *rdev)
6972 {
6973         u32 data, orig;
6974         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6975         bool disable_clkreq = false;
6976
6977         if (radeon_aspm == 0)
6978                 return;
6979
6980         if (!(rdev->flags & RADEON_IS_PCIE))
6981                 return;
6982
6983         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6984         data &= ~LC_XMIT_N_FTS_MASK;
6985         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6986         if (orig != data)
6987                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6988
6989         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6990         data |= LC_GO_TO_RECOVERY;
6991         if (orig != data)
6992                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6993
6994         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6995         data |= P_IGNORE_EDB_ERR;
6996         if (orig != data)
6997                 WREG32_PCIE(PCIE_P_CNTL, data);
6998
6999         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7000         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7001         data |= LC_PMI_TO_L1_DIS;
7002         if (!disable_l0s)
7003                 data |= LC_L0S_INACTIVITY(7);
7004
7005         if (!disable_l1) {
7006                 data |= LC_L1_INACTIVITY(7);
7007                 data &= ~LC_PMI_TO_L1_DIS;
7008                 if (orig != data)
7009                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7010
7011                 if (!disable_plloff_in_l1) {
7012                         bool clk_req_support;
7013
7014                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7015                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7016                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7017                         if (orig != data)
7018                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7019
7020                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7021                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7022                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7023                         if (orig != data)
7024                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7025
7026                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7027                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7028                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7029                         if (orig != data)
7030                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7031
7032                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7033                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7034                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7035                         if (orig != data)
7036                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7037
7038                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7039                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7040                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7041                                 if (orig != data)
7042                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7043
7044                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7045                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7046                                 if (orig != data)
7047                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7048
7049                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7050                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7051                                 if (orig != data)
7052                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7053
7054                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7055                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7056                                 if (orig != data)
7057                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7058
7059                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7060                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7061                                 if (orig != data)
7062                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7063
7064                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7065                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7066                                 if (orig != data)
7067                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7068
7069                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7070                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7071                                 if (orig != data)
7072                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7073
7074                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7075                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7076                                 if (orig != data)
7077                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7078                         }
7079                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7080                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7081                         data |= LC_DYN_LANES_PWR_STATE(3);
7082                         if (orig != data)
7083                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7084
7085                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7086                         data &= ~LS2_EXIT_TIME_MASK;
7087                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7088                                 data |= LS2_EXIT_TIME(5);
7089                         if (orig != data)
7090                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7091
7092                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7093                         data &= ~LS2_EXIT_TIME_MASK;
7094                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7095                                 data |= LS2_EXIT_TIME(5);
7096                         if (orig != data)
7097                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7098
7099                         if (!disable_clkreq) {
7100                                 struct pci_dev *root = rdev->pdev->bus->self;
7101                                 u32 lnkcap;
7102
7103                                 clk_req_support = false;
7104                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7105                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7106                                         clk_req_support = true;
7107                         } else {
7108                                 clk_req_support = false;
7109                         }
7110
7111                         if (clk_req_support) {
7112                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7113                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7114                                 if (orig != data)
7115                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7116
7117                                 orig = data = RREG32(THM_CLK_CNTL);
7118                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7119                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7120                                 if (orig != data)
7121                                         WREG32(THM_CLK_CNTL, data);
7122
7123                                 orig = data = RREG32(MISC_CLK_CNTL);
7124                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7125                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7126                                 if (orig != data)
7127                                         WREG32(MISC_CLK_CNTL, data);
7128
7129                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7130                                 data &= ~BCLK_AS_XCLK;
7131                                 if (orig != data)
7132                                         WREG32(CG_CLKPIN_CNTL, data);
7133
7134                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7135                                 data &= ~FORCE_BIF_REFCLK_EN;
7136                                 if (orig != data)
7137                                         WREG32(CG_CLKPIN_CNTL_2, data);
7138
7139                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7140                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7141                                 data |= MPLL_CLKOUT_SEL(4);
7142                                 if (orig != data)
7143                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7144
7145                                 orig = data = RREG32(SPLL_CNTL_MODE);
7146                                 data &= ~SPLL_REFCLK_SEL_MASK;
7147                                 if (orig != data)
7148                                         WREG32(SPLL_CNTL_MODE, data);
7149                         }
7150                 }
7151         } else {
7152                 if (orig != data)
7153                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7154         }
7155
7156         orig = data = RREG32_PCIE(PCIE_CNTL2);
7157         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7158         if (orig != data)
7159                 WREG32_PCIE(PCIE_CNTL2, data);
7160
7161         if (!disable_l0s) {
7162                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7163                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7164                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7165                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7166                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7167                                 data &= ~LC_L0S_INACTIVITY_MASK;
7168                                 if (orig != data)
7169                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7170                         }
7171                 }
7172         }
7173 }