]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/si.c
14472cca75ba3c4fe6b4a0f34c505f45556213d9
[karo-tx-linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63
64 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
65 extern void r600_ih_ring_fini(struct radeon_device *rdev);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72
73 static const u32 tahiti_golden_rlc_registers[] =
74 {
75         0xc424, 0xffffffff, 0x00601005,
76         0xc47c, 0xffffffff, 0x10104040,
77         0xc488, 0xffffffff, 0x0100000a,
78         0xc314, 0xffffffff, 0x00000800,
79         0xc30c, 0xffffffff, 0x800000f4,
80         0xf4a8, 0xffffffff, 0x00000000
81 };
82
83 static const u32 tahiti_golden_registers[] =
84 {
85         0x9a10, 0x00010000, 0x00018208,
86         0x9830, 0xffffffff, 0x00000000,
87         0x9834, 0xf00fffff, 0x00000400,
88         0x9838, 0x0002021c, 0x00020200,
89         0xc78, 0x00000080, 0x00000000,
90         0xd030, 0x000300c0, 0x00800040,
91         0xd830, 0x000300c0, 0x00800040,
92         0x5bb0, 0x000000f0, 0x00000070,
93         0x5bc0, 0x00200000, 0x50100000,
94         0x7030, 0x31000311, 0x00000011,
95         0x277c, 0x00000003, 0x000007ff,
96         0x240c, 0x000007ff, 0x00000000,
97         0x8a14, 0xf000001f, 0x00000007,
98         0x8b24, 0xffffffff, 0x00ffffff,
99         0x8b10, 0x0000ff0f, 0x00000000,
100         0x28a4c, 0x07ffffff, 0x4e000000,
101         0x28350, 0x3f3f3fff, 0x2a00126a,
102         0x30, 0x000000ff, 0x0040,
103         0x34, 0x00000040, 0x00004040,
104         0x9100, 0x07ffffff, 0x03000000,
105         0x8e88, 0x01ff1f3f, 0x00000000,
106         0x8e84, 0x01ff1f3f, 0x00000000,
107         0x9060, 0x0000007f, 0x00000020,
108         0x9508, 0x00010000, 0x00010000,
109         0xac14, 0x00000200, 0x000002fb,
110         0xac10, 0xffffffff, 0x0000543b,
111         0xac0c, 0xffffffff, 0xa9210876,
112         0x88d0, 0xffffffff, 0x000fff40,
113         0x88d4, 0x0000001f, 0x00000010,
114         0x1410, 0x20000000, 0x20fffed8,
115         0x15c0, 0x000c0fc0, 0x000c0400
116 };
117
118 static const u32 tahiti_golden_registers2[] =
119 {
120         0xc64, 0x00000001, 0x00000001
121 };
122
123 static const u32 pitcairn_golden_rlc_registers[] =
124 {
125         0xc424, 0xffffffff, 0x00601004,
126         0xc47c, 0xffffffff, 0x10102020,
127         0xc488, 0xffffffff, 0x01000020,
128         0xc314, 0xffffffff, 0x00000800,
129         0xc30c, 0xffffffff, 0x800000a4
130 };
131
132 static const u32 pitcairn_golden_registers[] =
133 {
134         0x9a10, 0x00010000, 0x00018208,
135         0x9830, 0xffffffff, 0x00000000,
136         0x9834, 0xf00fffff, 0x00000400,
137         0x9838, 0x0002021c, 0x00020200,
138         0xc78, 0x00000080, 0x00000000,
139         0xd030, 0x000300c0, 0x00800040,
140         0xd830, 0x000300c0, 0x00800040,
141         0x5bb0, 0x000000f0, 0x00000070,
142         0x5bc0, 0x00200000, 0x50100000,
143         0x7030, 0x31000311, 0x00000011,
144         0x2ae4, 0x00073ffe, 0x000022a2,
145         0x240c, 0x000007ff, 0x00000000,
146         0x8a14, 0xf000001f, 0x00000007,
147         0x8b24, 0xffffffff, 0x00ffffff,
148         0x8b10, 0x0000ff0f, 0x00000000,
149         0x28a4c, 0x07ffffff, 0x4e000000,
150         0x28350, 0x3f3f3fff, 0x2a00126a,
151         0x30, 0x000000ff, 0x0040,
152         0x34, 0x00000040, 0x00004040,
153         0x9100, 0x07ffffff, 0x03000000,
154         0x9060, 0x0000007f, 0x00000020,
155         0x9508, 0x00010000, 0x00010000,
156         0xac14, 0x000003ff, 0x000000f7,
157         0xac10, 0xffffffff, 0x00000000,
158         0xac0c, 0xffffffff, 0x32761054,
159         0x88d4, 0x0000001f, 0x00000010,
160         0x15c0, 0x000c0fc0, 0x000c0400
161 };
162
163 static const u32 verde_golden_rlc_registers[] =
164 {
165         0xc424, 0xffffffff, 0x033f1005,
166         0xc47c, 0xffffffff, 0x10808020,
167         0xc488, 0xffffffff, 0x00800008,
168         0xc314, 0xffffffff, 0x00001000,
169         0xc30c, 0xffffffff, 0x80010014
170 };
171
172 static const u32 verde_golden_registers[] =
173 {
174         0x9a10, 0x00010000, 0x00018208,
175         0x9830, 0xffffffff, 0x00000000,
176         0x9834, 0xf00fffff, 0x00000400,
177         0x9838, 0x0002021c, 0x00020200,
178         0xc78, 0x00000080, 0x00000000,
179         0xd030, 0x000300c0, 0x00800040,
180         0xd030, 0x000300c0, 0x00800040,
181         0xd830, 0x000300c0, 0x00800040,
182         0xd830, 0x000300c0, 0x00800040,
183         0x5bb0, 0x000000f0, 0x00000070,
184         0x5bc0, 0x00200000, 0x50100000,
185         0x7030, 0x31000311, 0x00000011,
186         0x2ae4, 0x00073ffe, 0x000022a2,
187         0x2ae4, 0x00073ffe, 0x000022a2,
188         0x2ae4, 0x00073ffe, 0x000022a2,
189         0x240c, 0x000007ff, 0x00000000,
190         0x240c, 0x000007ff, 0x00000000,
191         0x240c, 0x000007ff, 0x00000000,
192         0x8a14, 0xf000001f, 0x00000007,
193         0x8a14, 0xf000001f, 0x00000007,
194         0x8a14, 0xf000001f, 0x00000007,
195         0x8b24, 0xffffffff, 0x00ffffff,
196         0x8b10, 0x0000ff0f, 0x00000000,
197         0x28a4c, 0x07ffffff, 0x4e000000,
198         0x28350, 0x3f3f3fff, 0x0000124a,
199         0x28350, 0x3f3f3fff, 0x0000124a,
200         0x28350, 0x3f3f3fff, 0x0000124a,
201         0x30, 0x000000ff, 0x0040,
202         0x34, 0x00000040, 0x00004040,
203         0x9100, 0x07ffffff, 0x03000000,
204         0x9100, 0x07ffffff, 0x03000000,
205         0x8e88, 0x01ff1f3f, 0x00000000,
206         0x8e88, 0x01ff1f3f, 0x00000000,
207         0x8e88, 0x01ff1f3f, 0x00000000,
208         0x8e84, 0x01ff1f3f, 0x00000000,
209         0x8e84, 0x01ff1f3f, 0x00000000,
210         0x8e84, 0x01ff1f3f, 0x00000000,
211         0x9060, 0x0000007f, 0x00000020,
212         0x9508, 0x00010000, 0x00010000,
213         0xac14, 0x000003ff, 0x00000003,
214         0xac14, 0x000003ff, 0x00000003,
215         0xac14, 0x000003ff, 0x00000003,
216         0xac10, 0xffffffff, 0x00000000,
217         0xac10, 0xffffffff, 0x00000000,
218         0xac10, 0xffffffff, 0x00000000,
219         0xac0c, 0xffffffff, 0x00001032,
220         0xac0c, 0xffffffff, 0x00001032,
221         0xac0c, 0xffffffff, 0x00001032,
222         0x88d4, 0x0000001f, 0x00000010,
223         0x88d4, 0x0000001f, 0x00000010,
224         0x88d4, 0x0000001f, 0x00000010,
225         0x15c0, 0x000c0fc0, 0x000c0400
226 };
227
228 static const u32 oland_golden_rlc_registers[] =
229 {
230         0xc424, 0xffffffff, 0x00601005,
231         0xc47c, 0xffffffff, 0x10104040,
232         0xc488, 0xffffffff, 0x0100000a,
233         0xc314, 0xffffffff, 0x00000800,
234         0xc30c, 0xffffffff, 0x800000f4
235 };
236
237 static const u32 oland_golden_registers[] =
238 {
239         0x9a10, 0x00010000, 0x00018208,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9834, 0xf00fffff, 0x00000400,
242         0x9838, 0x0002021c, 0x00020200,
243         0xc78, 0x00000080, 0x00000000,
244         0xd030, 0x000300c0, 0x00800040,
245         0xd830, 0x000300c0, 0x00800040,
246         0x5bb0, 0x000000f0, 0x00000070,
247         0x5bc0, 0x00200000, 0x50100000,
248         0x7030, 0x31000311, 0x00000011,
249         0x2ae4, 0x00073ffe, 0x000022a2,
250         0x240c, 0x000007ff, 0x00000000,
251         0x8a14, 0xf000001f, 0x00000007,
252         0x8b24, 0xffffffff, 0x00ffffff,
253         0x8b10, 0x0000ff0f, 0x00000000,
254         0x28a4c, 0x07ffffff, 0x4e000000,
255         0x28350, 0x3f3f3fff, 0x00000082,
256         0x30, 0x000000ff, 0x0040,
257         0x34, 0x00000040, 0x00004040,
258         0x9100, 0x07ffffff, 0x03000000,
259         0x9060, 0x0000007f, 0x00000020,
260         0x9508, 0x00010000, 0x00010000,
261         0xac14, 0x000003ff, 0x000000f3,
262         0xac10, 0xffffffff, 0x00000000,
263         0xac0c, 0xffffffff, 0x00003210,
264         0x88d4, 0x0000001f, 0x00000010,
265         0x15c0, 0x000c0fc0, 0x000c0400
266 };
267
268 static const u32 tahiti_mgcg_cgcg_init[] =
269 {
270         0xc400, 0xffffffff, 0xfffffffc,
271         0x802c, 0xffffffff, 0xe0000000,
272         0x9a60, 0xffffffff, 0x00000100,
273         0x92a4, 0xffffffff, 0x00000100,
274         0xc164, 0xffffffff, 0x00000100,
275         0x9774, 0xffffffff, 0x00000100,
276         0x8984, 0xffffffff, 0x06000100,
277         0x8a18, 0xffffffff, 0x00000100,
278         0x92a0, 0xffffffff, 0x00000100,
279         0xc380, 0xffffffff, 0x00000100,
280         0x8b28, 0xffffffff, 0x00000100,
281         0x9144, 0xffffffff, 0x00000100,
282         0x8d88, 0xffffffff, 0x00000100,
283         0x8d8c, 0xffffffff, 0x00000100,
284         0x9030, 0xffffffff, 0x00000100,
285         0x9034, 0xffffffff, 0x00000100,
286         0x9038, 0xffffffff, 0x00000100,
287         0x903c, 0xffffffff, 0x00000100,
288         0xad80, 0xffffffff, 0x00000100,
289         0xac54, 0xffffffff, 0x00000100,
290         0x897c, 0xffffffff, 0x06000100,
291         0x9868, 0xffffffff, 0x00000100,
292         0x9510, 0xffffffff, 0x00000100,
293         0xaf04, 0xffffffff, 0x00000100,
294         0xae04, 0xffffffff, 0x00000100,
295         0x949c, 0xffffffff, 0x00000100,
296         0x802c, 0xffffffff, 0xe0000000,
297         0x9160, 0xffffffff, 0x00010000,
298         0x9164, 0xffffffff, 0x00030002,
299         0x9168, 0xffffffff, 0x00040007,
300         0x916c, 0xffffffff, 0x00060005,
301         0x9170, 0xffffffff, 0x00090008,
302         0x9174, 0xffffffff, 0x00020001,
303         0x9178, 0xffffffff, 0x00040003,
304         0x917c, 0xffffffff, 0x00000007,
305         0x9180, 0xffffffff, 0x00060005,
306         0x9184, 0xffffffff, 0x00090008,
307         0x9188, 0xffffffff, 0x00030002,
308         0x918c, 0xffffffff, 0x00050004,
309         0x9190, 0xffffffff, 0x00000008,
310         0x9194, 0xffffffff, 0x00070006,
311         0x9198, 0xffffffff, 0x000a0009,
312         0x919c, 0xffffffff, 0x00040003,
313         0x91a0, 0xffffffff, 0x00060005,
314         0x91a4, 0xffffffff, 0x00000009,
315         0x91a8, 0xffffffff, 0x00080007,
316         0x91ac, 0xffffffff, 0x000b000a,
317         0x91b0, 0xffffffff, 0x00050004,
318         0x91b4, 0xffffffff, 0x00070006,
319         0x91b8, 0xffffffff, 0x0008000b,
320         0x91bc, 0xffffffff, 0x000a0009,
321         0x91c0, 0xffffffff, 0x000d000c,
322         0x91c4, 0xffffffff, 0x00060005,
323         0x91c8, 0xffffffff, 0x00080007,
324         0x91cc, 0xffffffff, 0x0000000b,
325         0x91d0, 0xffffffff, 0x000a0009,
326         0x91d4, 0xffffffff, 0x000d000c,
327         0x91d8, 0xffffffff, 0x00070006,
328         0x91dc, 0xffffffff, 0x00090008,
329         0x91e0, 0xffffffff, 0x0000000c,
330         0x91e4, 0xffffffff, 0x000b000a,
331         0x91e8, 0xffffffff, 0x000e000d,
332         0x91ec, 0xffffffff, 0x00080007,
333         0x91f0, 0xffffffff, 0x000a0009,
334         0x91f4, 0xffffffff, 0x0000000d,
335         0x91f8, 0xffffffff, 0x000c000b,
336         0x91fc, 0xffffffff, 0x000f000e,
337         0x9200, 0xffffffff, 0x00090008,
338         0x9204, 0xffffffff, 0x000b000a,
339         0x9208, 0xffffffff, 0x000c000f,
340         0x920c, 0xffffffff, 0x000e000d,
341         0x9210, 0xffffffff, 0x00110010,
342         0x9214, 0xffffffff, 0x000a0009,
343         0x9218, 0xffffffff, 0x000c000b,
344         0x921c, 0xffffffff, 0x0000000f,
345         0x9220, 0xffffffff, 0x000e000d,
346         0x9224, 0xffffffff, 0x00110010,
347         0x9228, 0xffffffff, 0x000b000a,
348         0x922c, 0xffffffff, 0x000d000c,
349         0x9230, 0xffffffff, 0x00000010,
350         0x9234, 0xffffffff, 0x000f000e,
351         0x9238, 0xffffffff, 0x00120011,
352         0x923c, 0xffffffff, 0x000c000b,
353         0x9240, 0xffffffff, 0x000e000d,
354         0x9244, 0xffffffff, 0x00000011,
355         0x9248, 0xffffffff, 0x0010000f,
356         0x924c, 0xffffffff, 0x00130012,
357         0x9250, 0xffffffff, 0x000d000c,
358         0x9254, 0xffffffff, 0x000f000e,
359         0x9258, 0xffffffff, 0x00100013,
360         0x925c, 0xffffffff, 0x00120011,
361         0x9260, 0xffffffff, 0x00150014,
362         0x9264, 0xffffffff, 0x000e000d,
363         0x9268, 0xffffffff, 0x0010000f,
364         0x926c, 0xffffffff, 0x00000013,
365         0x9270, 0xffffffff, 0x00120011,
366         0x9274, 0xffffffff, 0x00150014,
367         0x9278, 0xffffffff, 0x000f000e,
368         0x927c, 0xffffffff, 0x00110010,
369         0x9280, 0xffffffff, 0x00000014,
370         0x9284, 0xffffffff, 0x00130012,
371         0x9288, 0xffffffff, 0x00160015,
372         0x928c, 0xffffffff, 0x0010000f,
373         0x9290, 0xffffffff, 0x00120011,
374         0x9294, 0xffffffff, 0x00000015,
375         0x9298, 0xffffffff, 0x00140013,
376         0x929c, 0xffffffff, 0x00170016,
377         0x9150, 0xffffffff, 0x96940200,
378         0x8708, 0xffffffff, 0x00900100,
379         0xc478, 0xffffffff, 0x00000080,
380         0xc404, 0xffffffff, 0x0020003f,
381         0x30, 0xffffffff, 0x0000001c,
382         0x34, 0x000f0000, 0x000f0000,
383         0x160c, 0xffffffff, 0x00000100,
384         0x1024, 0xffffffff, 0x00000100,
385         0x102c, 0x00000101, 0x00000000,
386         0x20a8, 0xffffffff, 0x00000104,
387         0x264c, 0x000c0000, 0x000c0000,
388         0x2648, 0x000c0000, 0x000c0000,
389         0x55e4, 0xff000fff, 0x00000100,
390         0x55e8, 0x00000001, 0x00000001,
391         0x2f50, 0x00000001, 0x00000001,
392         0x30cc, 0xc0000fff, 0x00000104,
393         0xc1e4, 0x00000001, 0x00000001,
394         0xd0c0, 0xfffffff0, 0x00000100,
395         0xd8c0, 0xfffffff0, 0x00000100
396 };
397
398 static const u32 pitcairn_mgcg_cgcg_init[] =
399 {
400         0xc400, 0xffffffff, 0xfffffffc,
401         0x802c, 0xffffffff, 0xe0000000,
402         0x9a60, 0xffffffff, 0x00000100,
403         0x92a4, 0xffffffff, 0x00000100,
404         0xc164, 0xffffffff, 0x00000100,
405         0x9774, 0xffffffff, 0x00000100,
406         0x8984, 0xffffffff, 0x06000100,
407         0x8a18, 0xffffffff, 0x00000100,
408         0x92a0, 0xffffffff, 0x00000100,
409         0xc380, 0xffffffff, 0x00000100,
410         0x8b28, 0xffffffff, 0x00000100,
411         0x9144, 0xffffffff, 0x00000100,
412         0x8d88, 0xffffffff, 0x00000100,
413         0x8d8c, 0xffffffff, 0x00000100,
414         0x9030, 0xffffffff, 0x00000100,
415         0x9034, 0xffffffff, 0x00000100,
416         0x9038, 0xffffffff, 0x00000100,
417         0x903c, 0xffffffff, 0x00000100,
418         0xad80, 0xffffffff, 0x00000100,
419         0xac54, 0xffffffff, 0x00000100,
420         0x897c, 0xffffffff, 0x06000100,
421         0x9868, 0xffffffff, 0x00000100,
422         0x9510, 0xffffffff, 0x00000100,
423         0xaf04, 0xffffffff, 0x00000100,
424         0xae04, 0xffffffff, 0x00000100,
425         0x949c, 0xffffffff, 0x00000100,
426         0x802c, 0xffffffff, 0xe0000000,
427         0x9160, 0xffffffff, 0x00010000,
428         0x9164, 0xffffffff, 0x00030002,
429         0x9168, 0xffffffff, 0x00040007,
430         0x916c, 0xffffffff, 0x00060005,
431         0x9170, 0xffffffff, 0x00090008,
432         0x9174, 0xffffffff, 0x00020001,
433         0x9178, 0xffffffff, 0x00040003,
434         0x917c, 0xffffffff, 0x00000007,
435         0x9180, 0xffffffff, 0x00060005,
436         0x9184, 0xffffffff, 0x00090008,
437         0x9188, 0xffffffff, 0x00030002,
438         0x918c, 0xffffffff, 0x00050004,
439         0x9190, 0xffffffff, 0x00000008,
440         0x9194, 0xffffffff, 0x00070006,
441         0x9198, 0xffffffff, 0x000a0009,
442         0x919c, 0xffffffff, 0x00040003,
443         0x91a0, 0xffffffff, 0x00060005,
444         0x91a4, 0xffffffff, 0x00000009,
445         0x91a8, 0xffffffff, 0x00080007,
446         0x91ac, 0xffffffff, 0x000b000a,
447         0x91b0, 0xffffffff, 0x00050004,
448         0x91b4, 0xffffffff, 0x00070006,
449         0x91b8, 0xffffffff, 0x0008000b,
450         0x91bc, 0xffffffff, 0x000a0009,
451         0x91c0, 0xffffffff, 0x000d000c,
452         0x9200, 0xffffffff, 0x00090008,
453         0x9204, 0xffffffff, 0x000b000a,
454         0x9208, 0xffffffff, 0x000c000f,
455         0x920c, 0xffffffff, 0x000e000d,
456         0x9210, 0xffffffff, 0x00110010,
457         0x9214, 0xffffffff, 0x000a0009,
458         0x9218, 0xffffffff, 0x000c000b,
459         0x921c, 0xffffffff, 0x0000000f,
460         0x9220, 0xffffffff, 0x000e000d,
461         0x9224, 0xffffffff, 0x00110010,
462         0x9228, 0xffffffff, 0x000b000a,
463         0x922c, 0xffffffff, 0x000d000c,
464         0x9230, 0xffffffff, 0x00000010,
465         0x9234, 0xffffffff, 0x000f000e,
466         0x9238, 0xffffffff, 0x00120011,
467         0x923c, 0xffffffff, 0x000c000b,
468         0x9240, 0xffffffff, 0x000e000d,
469         0x9244, 0xffffffff, 0x00000011,
470         0x9248, 0xffffffff, 0x0010000f,
471         0x924c, 0xffffffff, 0x00130012,
472         0x9250, 0xffffffff, 0x000d000c,
473         0x9254, 0xffffffff, 0x000f000e,
474         0x9258, 0xffffffff, 0x00100013,
475         0x925c, 0xffffffff, 0x00120011,
476         0x9260, 0xffffffff, 0x00150014,
477         0x9150, 0xffffffff, 0x96940200,
478         0x8708, 0xffffffff, 0x00900100,
479         0xc478, 0xffffffff, 0x00000080,
480         0xc404, 0xffffffff, 0x0020003f,
481         0x30, 0xffffffff, 0x0000001c,
482         0x34, 0x000f0000, 0x000f0000,
483         0x160c, 0xffffffff, 0x00000100,
484         0x1024, 0xffffffff, 0x00000100,
485         0x102c, 0x00000101, 0x00000000,
486         0x20a8, 0xffffffff, 0x00000104,
487         0x55e4, 0xff000fff, 0x00000100,
488         0x55e8, 0x00000001, 0x00000001,
489         0x2f50, 0x00000001, 0x00000001,
490         0x30cc, 0xc0000fff, 0x00000104,
491         0xc1e4, 0x00000001, 0x00000001,
492         0xd0c0, 0xfffffff0, 0x00000100,
493         0xd8c0, 0xfffffff0, 0x00000100
494 };
495
496 static const u32 verde_mgcg_cgcg_init[] =
497 {
498         0xc400, 0xffffffff, 0xfffffffc,
499         0x802c, 0xffffffff, 0xe0000000,
500         0x9a60, 0xffffffff, 0x00000100,
501         0x92a4, 0xffffffff, 0x00000100,
502         0xc164, 0xffffffff, 0x00000100,
503         0x9774, 0xffffffff, 0x00000100,
504         0x8984, 0xffffffff, 0x06000100,
505         0x8a18, 0xffffffff, 0x00000100,
506         0x92a0, 0xffffffff, 0x00000100,
507         0xc380, 0xffffffff, 0x00000100,
508         0x8b28, 0xffffffff, 0x00000100,
509         0x9144, 0xffffffff, 0x00000100,
510         0x8d88, 0xffffffff, 0x00000100,
511         0x8d8c, 0xffffffff, 0x00000100,
512         0x9030, 0xffffffff, 0x00000100,
513         0x9034, 0xffffffff, 0x00000100,
514         0x9038, 0xffffffff, 0x00000100,
515         0x903c, 0xffffffff, 0x00000100,
516         0xad80, 0xffffffff, 0x00000100,
517         0xac54, 0xffffffff, 0x00000100,
518         0x897c, 0xffffffff, 0x06000100,
519         0x9868, 0xffffffff, 0x00000100,
520         0x9510, 0xffffffff, 0x00000100,
521         0xaf04, 0xffffffff, 0x00000100,
522         0xae04, 0xffffffff, 0x00000100,
523         0x949c, 0xffffffff, 0x00000100,
524         0x802c, 0xffffffff, 0xe0000000,
525         0x9160, 0xffffffff, 0x00010000,
526         0x9164, 0xffffffff, 0x00030002,
527         0x9168, 0xffffffff, 0x00040007,
528         0x916c, 0xffffffff, 0x00060005,
529         0x9170, 0xffffffff, 0x00090008,
530         0x9174, 0xffffffff, 0x00020001,
531         0x9178, 0xffffffff, 0x00040003,
532         0x917c, 0xffffffff, 0x00000007,
533         0x9180, 0xffffffff, 0x00060005,
534         0x9184, 0xffffffff, 0x00090008,
535         0x9188, 0xffffffff, 0x00030002,
536         0x918c, 0xffffffff, 0x00050004,
537         0x9190, 0xffffffff, 0x00000008,
538         0x9194, 0xffffffff, 0x00070006,
539         0x9198, 0xffffffff, 0x000a0009,
540         0x919c, 0xffffffff, 0x00040003,
541         0x91a0, 0xffffffff, 0x00060005,
542         0x91a4, 0xffffffff, 0x00000009,
543         0x91a8, 0xffffffff, 0x00080007,
544         0x91ac, 0xffffffff, 0x000b000a,
545         0x91b0, 0xffffffff, 0x00050004,
546         0x91b4, 0xffffffff, 0x00070006,
547         0x91b8, 0xffffffff, 0x0008000b,
548         0x91bc, 0xffffffff, 0x000a0009,
549         0x91c0, 0xffffffff, 0x000d000c,
550         0x9200, 0xffffffff, 0x00090008,
551         0x9204, 0xffffffff, 0x000b000a,
552         0x9208, 0xffffffff, 0x000c000f,
553         0x920c, 0xffffffff, 0x000e000d,
554         0x9210, 0xffffffff, 0x00110010,
555         0x9214, 0xffffffff, 0x000a0009,
556         0x9218, 0xffffffff, 0x000c000b,
557         0x921c, 0xffffffff, 0x0000000f,
558         0x9220, 0xffffffff, 0x000e000d,
559         0x9224, 0xffffffff, 0x00110010,
560         0x9228, 0xffffffff, 0x000b000a,
561         0x922c, 0xffffffff, 0x000d000c,
562         0x9230, 0xffffffff, 0x00000010,
563         0x9234, 0xffffffff, 0x000f000e,
564         0x9238, 0xffffffff, 0x00120011,
565         0x923c, 0xffffffff, 0x000c000b,
566         0x9240, 0xffffffff, 0x000e000d,
567         0x9244, 0xffffffff, 0x00000011,
568         0x9248, 0xffffffff, 0x0010000f,
569         0x924c, 0xffffffff, 0x00130012,
570         0x9250, 0xffffffff, 0x000d000c,
571         0x9254, 0xffffffff, 0x000f000e,
572         0x9258, 0xffffffff, 0x00100013,
573         0x925c, 0xffffffff, 0x00120011,
574         0x9260, 0xffffffff, 0x00150014,
575         0x9150, 0xffffffff, 0x96940200,
576         0x8708, 0xffffffff, 0x00900100,
577         0xc478, 0xffffffff, 0x00000080,
578         0xc404, 0xffffffff, 0x0020003f,
579         0x30, 0xffffffff, 0x0000001c,
580         0x34, 0x000f0000, 0x000f0000,
581         0x160c, 0xffffffff, 0x00000100,
582         0x1024, 0xffffffff, 0x00000100,
583         0x102c, 0x00000101, 0x00000000,
584         0x20a8, 0xffffffff, 0x00000104,
585         0x264c, 0x000c0000, 0x000c0000,
586         0x2648, 0x000c0000, 0x000c0000,
587         0x55e4, 0xff000fff, 0x00000100,
588         0x55e8, 0x00000001, 0x00000001,
589         0x2f50, 0x00000001, 0x00000001,
590         0x30cc, 0xc0000fff, 0x00000104,
591         0xc1e4, 0x00000001, 0x00000001,
592         0xd0c0, 0xfffffff0, 0x00000100,
593         0xd8c0, 0xfffffff0, 0x00000100
594 };
595
596 static const u32 oland_mgcg_cgcg_init[] =
597 {
598         0xc400, 0xffffffff, 0xfffffffc,
599         0x802c, 0xffffffff, 0xe0000000,
600         0x9a60, 0xffffffff, 0x00000100,
601         0x92a4, 0xffffffff, 0x00000100,
602         0xc164, 0xffffffff, 0x00000100,
603         0x9774, 0xffffffff, 0x00000100,
604         0x8984, 0xffffffff, 0x06000100,
605         0x8a18, 0xffffffff, 0x00000100,
606         0x92a0, 0xffffffff, 0x00000100,
607         0xc380, 0xffffffff, 0x00000100,
608         0x8b28, 0xffffffff, 0x00000100,
609         0x9144, 0xffffffff, 0x00000100,
610         0x8d88, 0xffffffff, 0x00000100,
611         0x8d8c, 0xffffffff, 0x00000100,
612         0x9030, 0xffffffff, 0x00000100,
613         0x9034, 0xffffffff, 0x00000100,
614         0x9038, 0xffffffff, 0x00000100,
615         0x903c, 0xffffffff, 0x00000100,
616         0xad80, 0xffffffff, 0x00000100,
617         0xac54, 0xffffffff, 0x00000100,
618         0x897c, 0xffffffff, 0x06000100,
619         0x9868, 0xffffffff, 0x00000100,
620         0x9510, 0xffffffff, 0x00000100,
621         0xaf04, 0xffffffff, 0x00000100,
622         0xae04, 0xffffffff, 0x00000100,
623         0x949c, 0xffffffff, 0x00000100,
624         0x802c, 0xffffffff, 0xe0000000,
625         0x9160, 0xffffffff, 0x00010000,
626         0x9164, 0xffffffff, 0x00030002,
627         0x9168, 0xffffffff, 0x00040007,
628         0x916c, 0xffffffff, 0x00060005,
629         0x9170, 0xffffffff, 0x00090008,
630         0x9174, 0xffffffff, 0x00020001,
631         0x9178, 0xffffffff, 0x00040003,
632         0x917c, 0xffffffff, 0x00000007,
633         0x9180, 0xffffffff, 0x00060005,
634         0x9184, 0xffffffff, 0x00090008,
635         0x9188, 0xffffffff, 0x00030002,
636         0x918c, 0xffffffff, 0x00050004,
637         0x9190, 0xffffffff, 0x00000008,
638         0x9194, 0xffffffff, 0x00070006,
639         0x9198, 0xffffffff, 0x000a0009,
640         0x919c, 0xffffffff, 0x00040003,
641         0x91a0, 0xffffffff, 0x00060005,
642         0x91a4, 0xffffffff, 0x00000009,
643         0x91a8, 0xffffffff, 0x00080007,
644         0x91ac, 0xffffffff, 0x000b000a,
645         0x91b0, 0xffffffff, 0x00050004,
646         0x91b4, 0xffffffff, 0x00070006,
647         0x91b8, 0xffffffff, 0x0008000b,
648         0x91bc, 0xffffffff, 0x000a0009,
649         0x91c0, 0xffffffff, 0x000d000c,
650         0x91c4, 0xffffffff, 0x00060005,
651         0x91c8, 0xffffffff, 0x00080007,
652         0x91cc, 0xffffffff, 0x0000000b,
653         0x91d0, 0xffffffff, 0x000a0009,
654         0x91d4, 0xffffffff, 0x000d000c,
655         0x9150, 0xffffffff, 0x96940200,
656         0x8708, 0xffffffff, 0x00900100,
657         0xc478, 0xffffffff, 0x00000080,
658         0xc404, 0xffffffff, 0x0020003f,
659         0x30, 0xffffffff, 0x0000001c,
660         0x34, 0x000f0000, 0x000f0000,
661         0x160c, 0xffffffff, 0x00000100,
662         0x1024, 0xffffffff, 0x00000100,
663         0x102c, 0x00000101, 0x00000000,
664         0x20a8, 0xffffffff, 0x00000104,
665         0x264c, 0x000c0000, 0x000c0000,
666         0x2648, 0x000c0000, 0x000c0000,
667         0x55e4, 0xff000fff, 0x00000100,
668         0x55e8, 0x00000001, 0x00000001,
669         0x2f50, 0x00000001, 0x00000001,
670         0x30cc, 0xc0000fff, 0x00000104,
671         0xc1e4, 0x00000001, 0x00000001,
672         0xd0c0, 0xfffffff0, 0x00000100,
673         0xd8c0, 0xfffffff0, 0x00000100
674 };
675
676 static u32 verde_pg_init[] =
677 {
678         0x353c, 0xffffffff, 0x40000,
679         0x3538, 0xffffffff, 0x200010ff,
680         0x353c, 0xffffffff, 0x0,
681         0x353c, 0xffffffff, 0x0,
682         0x353c, 0xffffffff, 0x0,
683         0x353c, 0xffffffff, 0x0,
684         0x353c, 0xffffffff, 0x0,
685         0x353c, 0xffffffff, 0x7007,
686         0x3538, 0xffffffff, 0x300010ff,
687         0x353c, 0xffffffff, 0x0,
688         0x353c, 0xffffffff, 0x0,
689         0x353c, 0xffffffff, 0x0,
690         0x353c, 0xffffffff, 0x0,
691         0x353c, 0xffffffff, 0x0,
692         0x353c, 0xffffffff, 0x400000,
693         0x3538, 0xffffffff, 0x100010ff,
694         0x353c, 0xffffffff, 0x0,
695         0x353c, 0xffffffff, 0x0,
696         0x353c, 0xffffffff, 0x0,
697         0x353c, 0xffffffff, 0x0,
698         0x353c, 0xffffffff, 0x0,
699         0x353c, 0xffffffff, 0x120200,
700         0x3538, 0xffffffff, 0x500010ff,
701         0x353c, 0xffffffff, 0x0,
702         0x353c, 0xffffffff, 0x0,
703         0x353c, 0xffffffff, 0x0,
704         0x353c, 0xffffffff, 0x0,
705         0x353c, 0xffffffff, 0x0,
706         0x353c, 0xffffffff, 0x1e1e16,
707         0x3538, 0xffffffff, 0x600010ff,
708         0x353c, 0xffffffff, 0x0,
709         0x353c, 0xffffffff, 0x0,
710         0x353c, 0xffffffff, 0x0,
711         0x353c, 0xffffffff, 0x0,
712         0x353c, 0xffffffff, 0x0,
713         0x353c, 0xffffffff, 0x171f1e,
714         0x3538, 0xffffffff, 0x700010ff,
715         0x353c, 0xffffffff, 0x0,
716         0x353c, 0xffffffff, 0x0,
717         0x353c, 0xffffffff, 0x0,
718         0x353c, 0xffffffff, 0x0,
719         0x353c, 0xffffffff, 0x0,
720         0x353c, 0xffffffff, 0x0,
721         0x3538, 0xffffffff, 0x9ff,
722         0x3500, 0xffffffff, 0x0,
723         0x3504, 0xffffffff, 0x10000800,
724         0x3504, 0xffffffff, 0xf,
725         0x3504, 0xffffffff, 0xf,
726         0x3500, 0xffffffff, 0x4,
727         0x3504, 0xffffffff, 0x1000051e,
728         0x3504, 0xffffffff, 0xffff,
729         0x3504, 0xffffffff, 0xffff,
730         0x3500, 0xffffffff, 0x8,
731         0x3504, 0xffffffff, 0x80500,
732         0x3500, 0xffffffff, 0x12,
733         0x3504, 0xffffffff, 0x9050c,
734         0x3500, 0xffffffff, 0x1d,
735         0x3504, 0xffffffff, 0xb052c,
736         0x3500, 0xffffffff, 0x2a,
737         0x3504, 0xffffffff, 0x1053e,
738         0x3500, 0xffffffff, 0x2d,
739         0x3504, 0xffffffff, 0x10546,
740         0x3500, 0xffffffff, 0x30,
741         0x3504, 0xffffffff, 0xa054e,
742         0x3500, 0xffffffff, 0x3c,
743         0x3504, 0xffffffff, 0x1055f,
744         0x3500, 0xffffffff, 0x3f,
745         0x3504, 0xffffffff, 0x10567,
746         0x3500, 0xffffffff, 0x42,
747         0x3504, 0xffffffff, 0x1056f,
748         0x3500, 0xffffffff, 0x45,
749         0x3504, 0xffffffff, 0x10572,
750         0x3500, 0xffffffff, 0x48,
751         0x3504, 0xffffffff, 0x20575,
752         0x3500, 0xffffffff, 0x4c,
753         0x3504, 0xffffffff, 0x190801,
754         0x3500, 0xffffffff, 0x67,
755         0x3504, 0xffffffff, 0x1082a,
756         0x3500, 0xffffffff, 0x6a,
757         0x3504, 0xffffffff, 0x1b082d,
758         0x3500, 0xffffffff, 0x87,
759         0x3504, 0xffffffff, 0x310851,
760         0x3500, 0xffffffff, 0xba,
761         0x3504, 0xffffffff, 0x891,
762         0x3500, 0xffffffff, 0xbc,
763         0x3504, 0xffffffff, 0x893,
764         0x3500, 0xffffffff, 0xbe,
765         0x3504, 0xffffffff, 0x20895,
766         0x3500, 0xffffffff, 0xc2,
767         0x3504, 0xffffffff, 0x20899,
768         0x3500, 0xffffffff, 0xc6,
769         0x3504, 0xffffffff, 0x2089d,
770         0x3500, 0xffffffff, 0xca,
771         0x3504, 0xffffffff, 0x8a1,
772         0x3500, 0xffffffff, 0xcc,
773         0x3504, 0xffffffff, 0x8a3,
774         0x3500, 0xffffffff, 0xce,
775         0x3504, 0xffffffff, 0x308a5,
776         0x3500, 0xffffffff, 0xd3,
777         0x3504, 0xffffffff, 0x6d08cd,
778         0x3500, 0xffffffff, 0x142,
779         0x3504, 0xffffffff, 0x2000095a,
780         0x3504, 0xffffffff, 0x1,
781         0x3500, 0xffffffff, 0x144,
782         0x3504, 0xffffffff, 0x301f095b,
783         0x3500, 0xffffffff, 0x165,
784         0x3504, 0xffffffff, 0xc094d,
785         0x3500, 0xffffffff, 0x173,
786         0x3504, 0xffffffff, 0xf096d,
787         0x3500, 0xffffffff, 0x184,
788         0x3504, 0xffffffff, 0x15097f,
789         0x3500, 0xffffffff, 0x19b,
790         0x3504, 0xffffffff, 0xc0998,
791         0x3500, 0xffffffff, 0x1a9,
792         0x3504, 0xffffffff, 0x409a7,
793         0x3500, 0xffffffff, 0x1af,
794         0x3504, 0xffffffff, 0xcdc,
795         0x3500, 0xffffffff, 0x1b1,
796         0x3504, 0xffffffff, 0x800,
797         0x3508, 0xffffffff, 0x6c9b2000,
798         0x3510, 0xfc00, 0x2000,
799         0x3544, 0xffffffff, 0xfc0,
800         0x28d4, 0x00000100, 0x100
801 };
802
803 static void si_init_golden_registers(struct radeon_device *rdev)
804 {
805         switch (rdev->family) {
806         case CHIP_TAHITI:
807                 radeon_program_register_sequence(rdev,
808                                                  tahiti_golden_registers,
809                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
810                 radeon_program_register_sequence(rdev,
811                                                  tahiti_golden_rlc_registers,
812                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
813                 radeon_program_register_sequence(rdev,
814                                                  tahiti_mgcg_cgcg_init,
815                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
816                 radeon_program_register_sequence(rdev,
817                                                  tahiti_golden_registers2,
818                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
819                 break;
820         case CHIP_PITCAIRN:
821                 radeon_program_register_sequence(rdev,
822                                                  pitcairn_golden_registers,
823                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
824                 radeon_program_register_sequence(rdev,
825                                                  pitcairn_golden_rlc_registers,
826                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
827                 radeon_program_register_sequence(rdev,
828                                                  pitcairn_mgcg_cgcg_init,
829                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
830                 break;
831         case CHIP_VERDE:
832                 radeon_program_register_sequence(rdev,
833                                                  verde_golden_registers,
834                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
835                 radeon_program_register_sequence(rdev,
836                                                  verde_golden_rlc_registers,
837                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
838                 radeon_program_register_sequence(rdev,
839                                                  verde_mgcg_cgcg_init,
840                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
841                 radeon_program_register_sequence(rdev,
842                                                  verde_pg_init,
843                                                  (const u32)ARRAY_SIZE(verde_pg_init));
844                 break;
845         case CHIP_OLAND:
846                 radeon_program_register_sequence(rdev,
847                                                  oland_golden_registers,
848                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
849                 radeon_program_register_sequence(rdev,
850                                                  oland_golden_rlc_registers,
851                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
852                 radeon_program_register_sequence(rdev,
853                                                  oland_mgcg_cgcg_init,
854                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
855                 break;
856         default:
857                 break;
858         }
859 }
860
861 #define PCIE_BUS_CLK                10000
862 #define TCLK                        (PCIE_BUS_CLK / 10)
863
864 /**
865  * si_get_xclk - get the xclk
866  *
867  * @rdev: radeon_device pointer
868  *
869  * Returns the reference clock used by the gfx engine
870  * (SI).
871  */
872 u32 si_get_xclk(struct radeon_device *rdev)
873 {
874         u32 reference_clock = rdev->clock.spll.reference_freq;
875         u32 tmp;
876
877         tmp = RREG32(CG_CLKPIN_CNTL_2);
878         if (tmp & MUX_TCLK_TO_XCLK)
879                 return TCLK;
880
881         tmp = RREG32(CG_CLKPIN_CNTL);
882         if (tmp & XTALIN_DIVIDE)
883                 return reference_clock / 4;
884
885         return reference_clock;
886 }
887
888 /* get temperature in millidegrees */
889 int si_get_temp(struct radeon_device *rdev)
890 {
891         u32 temp;
892         int actual_temp = 0;
893
894         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
895                 CTF_TEMP_SHIFT;
896
897         if (temp & 0x200)
898                 actual_temp = 255;
899         else
900                 actual_temp = temp & 0x1ff;
901
902         actual_temp = (actual_temp * 1000);
903
904         return actual_temp;
905 }
906
907 #define TAHITI_IO_MC_REGS_SIZE 36
908
909 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
910         {0x0000006f, 0x03044000},
911         {0x00000070, 0x0480c018},
912         {0x00000071, 0x00000040},
913         {0x00000072, 0x01000000},
914         {0x00000074, 0x000000ff},
915         {0x00000075, 0x00143400},
916         {0x00000076, 0x08ec0800},
917         {0x00000077, 0x040000cc},
918         {0x00000079, 0x00000000},
919         {0x0000007a, 0x21000409},
920         {0x0000007c, 0x00000000},
921         {0x0000007d, 0xe8000000},
922         {0x0000007e, 0x044408a8},
923         {0x0000007f, 0x00000003},
924         {0x00000080, 0x00000000},
925         {0x00000081, 0x01000000},
926         {0x00000082, 0x02000000},
927         {0x00000083, 0x00000000},
928         {0x00000084, 0xe3f3e4f4},
929         {0x00000085, 0x00052024},
930         {0x00000087, 0x00000000},
931         {0x00000088, 0x66036603},
932         {0x00000089, 0x01000000},
933         {0x0000008b, 0x1c0a0000},
934         {0x0000008c, 0xff010000},
935         {0x0000008e, 0xffffefff},
936         {0x0000008f, 0xfff3efff},
937         {0x00000090, 0xfff3efbf},
938         {0x00000094, 0x00101101},
939         {0x00000095, 0x00000fff},
940         {0x00000096, 0x00116fff},
941         {0x00000097, 0x60010000},
942         {0x00000098, 0x10010000},
943         {0x00000099, 0x00006000},
944         {0x0000009a, 0x00001000},
945         {0x0000009f, 0x00a77400}
946 };
947
948 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
949         {0x0000006f, 0x03044000},
950         {0x00000070, 0x0480c018},
951         {0x00000071, 0x00000040},
952         {0x00000072, 0x01000000},
953         {0x00000074, 0x000000ff},
954         {0x00000075, 0x00143400},
955         {0x00000076, 0x08ec0800},
956         {0x00000077, 0x040000cc},
957         {0x00000079, 0x00000000},
958         {0x0000007a, 0x21000409},
959         {0x0000007c, 0x00000000},
960         {0x0000007d, 0xe8000000},
961         {0x0000007e, 0x044408a8},
962         {0x0000007f, 0x00000003},
963         {0x00000080, 0x00000000},
964         {0x00000081, 0x01000000},
965         {0x00000082, 0x02000000},
966         {0x00000083, 0x00000000},
967         {0x00000084, 0xe3f3e4f4},
968         {0x00000085, 0x00052024},
969         {0x00000087, 0x00000000},
970         {0x00000088, 0x66036603},
971         {0x00000089, 0x01000000},
972         {0x0000008b, 0x1c0a0000},
973         {0x0000008c, 0xff010000},
974         {0x0000008e, 0xffffefff},
975         {0x0000008f, 0xfff3efff},
976         {0x00000090, 0xfff3efbf},
977         {0x00000094, 0x00101101},
978         {0x00000095, 0x00000fff},
979         {0x00000096, 0x00116fff},
980         {0x00000097, 0x60010000},
981         {0x00000098, 0x10010000},
982         {0x00000099, 0x00006000},
983         {0x0000009a, 0x00001000},
984         {0x0000009f, 0x00a47400}
985 };
986
987 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
988         {0x0000006f, 0x03044000},
989         {0x00000070, 0x0480c018},
990         {0x00000071, 0x00000040},
991         {0x00000072, 0x01000000},
992         {0x00000074, 0x000000ff},
993         {0x00000075, 0x00143400},
994         {0x00000076, 0x08ec0800},
995         {0x00000077, 0x040000cc},
996         {0x00000079, 0x00000000},
997         {0x0000007a, 0x21000409},
998         {0x0000007c, 0x00000000},
999         {0x0000007d, 0xe8000000},
1000         {0x0000007e, 0x044408a8},
1001         {0x0000007f, 0x00000003},
1002         {0x00000080, 0x00000000},
1003         {0x00000081, 0x01000000},
1004         {0x00000082, 0x02000000},
1005         {0x00000083, 0x00000000},
1006         {0x00000084, 0xe3f3e4f4},
1007         {0x00000085, 0x00052024},
1008         {0x00000087, 0x00000000},
1009         {0x00000088, 0x66036603},
1010         {0x00000089, 0x01000000},
1011         {0x0000008b, 0x1c0a0000},
1012         {0x0000008c, 0xff010000},
1013         {0x0000008e, 0xffffefff},
1014         {0x0000008f, 0xfff3efff},
1015         {0x00000090, 0xfff3efbf},
1016         {0x00000094, 0x00101101},
1017         {0x00000095, 0x00000fff},
1018         {0x00000096, 0x00116fff},
1019         {0x00000097, 0x60010000},
1020         {0x00000098, 0x10010000},
1021         {0x00000099, 0x00006000},
1022         {0x0000009a, 0x00001000},
1023         {0x0000009f, 0x00a37400}
1024 };
1025
1026 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1027         {0x0000006f, 0x03044000},
1028         {0x00000070, 0x0480c018},
1029         {0x00000071, 0x00000040},
1030         {0x00000072, 0x01000000},
1031         {0x00000074, 0x000000ff},
1032         {0x00000075, 0x00143400},
1033         {0x00000076, 0x08ec0800},
1034         {0x00000077, 0x040000cc},
1035         {0x00000079, 0x00000000},
1036         {0x0000007a, 0x21000409},
1037         {0x0000007c, 0x00000000},
1038         {0x0000007d, 0xe8000000},
1039         {0x0000007e, 0x044408a8},
1040         {0x0000007f, 0x00000003},
1041         {0x00000080, 0x00000000},
1042         {0x00000081, 0x01000000},
1043         {0x00000082, 0x02000000},
1044         {0x00000083, 0x00000000},
1045         {0x00000084, 0xe3f3e4f4},
1046         {0x00000085, 0x00052024},
1047         {0x00000087, 0x00000000},
1048         {0x00000088, 0x66036603},
1049         {0x00000089, 0x01000000},
1050         {0x0000008b, 0x1c0a0000},
1051         {0x0000008c, 0xff010000},
1052         {0x0000008e, 0xffffefff},
1053         {0x0000008f, 0xfff3efff},
1054         {0x00000090, 0xfff3efbf},
1055         {0x00000094, 0x00101101},
1056         {0x00000095, 0x00000fff},
1057         {0x00000096, 0x00116fff},
1058         {0x00000097, 0x60010000},
1059         {0x00000098, 0x10010000},
1060         {0x00000099, 0x00006000},
1061         {0x0000009a, 0x00001000},
1062         {0x0000009f, 0x00a17730}
1063 };
1064
1065 /* ucode loading */
1066 static int si_mc_load_microcode(struct radeon_device *rdev)
1067 {
1068         const __be32 *fw_data;
1069         u32 running, blackout = 0;
1070         u32 *io_mc_regs;
1071         int i, ucode_size, regs_size;
1072
1073         if (!rdev->mc_fw)
1074                 return -EINVAL;
1075
1076         switch (rdev->family) {
1077         case CHIP_TAHITI:
1078                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1079                 ucode_size = SI_MC_UCODE_SIZE;
1080                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1081                 break;
1082         case CHIP_PITCAIRN:
1083                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1084                 ucode_size = SI_MC_UCODE_SIZE;
1085                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1086                 break;
1087         case CHIP_VERDE:
1088         default:
1089                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1090                 ucode_size = SI_MC_UCODE_SIZE;
1091                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1092                 break;
1093         case CHIP_OLAND:
1094                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1095                 ucode_size = OLAND_MC_UCODE_SIZE;
1096                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1097                 break;
1098         }
1099
1100         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1101
1102         if (running == 0) {
1103                 if (running) {
1104                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1105                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1106                 }
1107
1108                 /* reset the engine and set to writable */
1109                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1110                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1111
1112                 /* load mc io regs */
1113                 for (i = 0; i < regs_size; i++) {
1114                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1115                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1116                 }
1117                 /* load the MC ucode */
1118                 fw_data = (const __be32 *)rdev->mc_fw->data;
1119                 for (i = 0; i < ucode_size; i++)
1120                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1121
1122                 /* put the engine back into the active state */
1123                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1124                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1125                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1126
1127                 /* wait for training to complete */
1128                 for (i = 0; i < rdev->usec_timeout; i++) {
1129                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1130                                 break;
1131                         udelay(1);
1132                 }
1133                 for (i = 0; i < rdev->usec_timeout; i++) {
1134                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1135                                 break;
1136                         udelay(1);
1137                 }
1138
1139                 if (running)
1140                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1141         }
1142
1143         return 0;
1144 }
1145
1146 static int si_init_microcode(struct radeon_device *rdev)
1147 {
1148         struct platform_device *pdev;
1149         const char *chip_name;
1150         const char *rlc_chip_name;
1151         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1152         char fw_name[30];
1153         int err;
1154
1155         DRM_DEBUG("\n");
1156
1157         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1158         err = IS_ERR(pdev);
1159         if (err) {
1160                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1161                 return -EINVAL;
1162         }
1163
1164         switch (rdev->family) {
1165         case CHIP_TAHITI:
1166                 chip_name = "TAHITI";
1167                 rlc_chip_name = "TAHITI";
1168                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1169                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1170                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1171                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1172                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1173                 break;
1174         case CHIP_PITCAIRN:
1175                 chip_name = "PITCAIRN";
1176                 rlc_chip_name = "PITCAIRN";
1177                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1178                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1179                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1180                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1181                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1182                 break;
1183         case CHIP_VERDE:
1184                 chip_name = "VERDE";
1185                 rlc_chip_name = "VERDE";
1186                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1187                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1188                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1189                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1190                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1191                 break;
1192         case CHIP_OLAND:
1193                 chip_name = "OLAND";
1194                 rlc_chip_name = "OLAND";
1195                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1196                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1197                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1198                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1199                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1200                 break;
1201         default: BUG();
1202         }
1203
1204         DRM_INFO("Loading %s Microcode\n", chip_name);
1205
1206         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1207         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1208         if (err)
1209                 goto out;
1210         if (rdev->pfp_fw->size != pfp_req_size) {
1211                 printk(KERN_ERR
1212                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1213                        rdev->pfp_fw->size, fw_name);
1214                 err = -EINVAL;
1215                 goto out;
1216         }
1217
1218         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1219         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1220         if (err)
1221                 goto out;
1222         if (rdev->me_fw->size != me_req_size) {
1223                 printk(KERN_ERR
1224                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1225                        rdev->me_fw->size, fw_name);
1226                 err = -EINVAL;
1227         }
1228
1229         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1230         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1231         if (err)
1232                 goto out;
1233         if (rdev->ce_fw->size != ce_req_size) {
1234                 printk(KERN_ERR
1235                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1236                        rdev->ce_fw->size, fw_name);
1237                 err = -EINVAL;
1238         }
1239
1240         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1241         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1242         if (err)
1243                 goto out;
1244         if (rdev->rlc_fw->size != rlc_req_size) {
1245                 printk(KERN_ERR
1246                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1247                        rdev->rlc_fw->size, fw_name);
1248                 err = -EINVAL;
1249         }
1250
1251         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1252         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1253         if (err)
1254                 goto out;
1255         if (rdev->mc_fw->size != mc_req_size) {
1256                 printk(KERN_ERR
1257                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1258                        rdev->mc_fw->size, fw_name);
1259                 err = -EINVAL;
1260         }
1261
1262 out:
1263         platform_device_unregister(pdev);
1264
1265         if (err) {
1266                 if (err != -EINVAL)
1267                         printk(KERN_ERR
1268                                "si_cp: Failed to load firmware \"%s\"\n",
1269                                fw_name);
1270                 release_firmware(rdev->pfp_fw);
1271                 rdev->pfp_fw = NULL;
1272                 release_firmware(rdev->me_fw);
1273                 rdev->me_fw = NULL;
1274                 release_firmware(rdev->ce_fw);
1275                 rdev->ce_fw = NULL;
1276                 release_firmware(rdev->rlc_fw);
1277                 rdev->rlc_fw = NULL;
1278                 release_firmware(rdev->mc_fw);
1279                 rdev->mc_fw = NULL;
1280         }
1281         return err;
1282 }
1283
1284 /* watermark setup */
1285 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1286                                    struct radeon_crtc *radeon_crtc,
1287                                    struct drm_display_mode *mode,
1288                                    struct drm_display_mode *other_mode)
1289 {
1290         u32 tmp;
1291         /*
1292          * Line Buffer Setup
1293          * There are 3 line buffers, each one shared by 2 display controllers.
1294          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1295          * the display controllers.  The paritioning is done via one of four
1296          * preset allocations specified in bits 21:20:
1297          *  0 - half lb
1298          *  2 - whole lb, other crtc must be disabled
1299          */
1300         /* this can get tricky if we have two large displays on a paired group
1301          * of crtcs.  Ideally for multiple large displays we'd assign them to
1302          * non-linked crtcs for maximum line buffer allocation.
1303          */
1304         if (radeon_crtc->base.enabled && mode) {
1305                 if (other_mode)
1306                         tmp = 0; /* 1/2 */
1307                 else
1308                         tmp = 2; /* whole */
1309         } else
1310                 tmp = 0;
1311
1312         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1313                DC_LB_MEMORY_CONFIG(tmp));
1314
1315         if (radeon_crtc->base.enabled && mode) {
1316                 switch (tmp) {
1317                 case 0:
1318                 default:
1319                         return 4096 * 2;
1320                 case 2:
1321                         return 8192 * 2;
1322                 }
1323         }
1324
1325         /* controller not enabled, so no lb used */
1326         return 0;
1327 }
1328
1329 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1330 {
1331         u32 tmp = RREG32(MC_SHARED_CHMAP);
1332
1333         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1334         case 0:
1335         default:
1336                 return 1;
1337         case 1:
1338                 return 2;
1339         case 2:
1340                 return 4;
1341         case 3:
1342                 return 8;
1343         case 4:
1344                 return 3;
1345         case 5:
1346                 return 6;
1347         case 6:
1348                 return 10;
1349         case 7:
1350                 return 12;
1351         case 8:
1352                 return 16;
1353         }
1354 }
1355
1356 struct dce6_wm_params {
1357         u32 dram_channels; /* number of dram channels */
1358         u32 yclk;          /* bandwidth per dram data pin in kHz */
1359         u32 sclk;          /* engine clock in kHz */
1360         u32 disp_clk;      /* display clock in kHz */
1361         u32 src_width;     /* viewport width */
1362         u32 active_time;   /* active display time in ns */
1363         u32 blank_time;    /* blank time in ns */
1364         bool interlaced;    /* mode is interlaced */
1365         fixed20_12 vsc;    /* vertical scale ratio */
1366         u32 num_heads;     /* number of active crtcs */
1367         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1368         u32 lb_size;       /* line buffer allocated to pipe */
1369         u32 vtaps;         /* vertical scaler taps */
1370 };
1371
1372 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1373 {
1374         /* Calculate raw DRAM Bandwidth */
1375         fixed20_12 dram_efficiency; /* 0.7 */
1376         fixed20_12 yclk, dram_channels, bandwidth;
1377         fixed20_12 a;
1378
1379         a.full = dfixed_const(1000);
1380         yclk.full = dfixed_const(wm->yclk);
1381         yclk.full = dfixed_div(yclk, a);
1382         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1383         a.full = dfixed_const(10);
1384         dram_efficiency.full = dfixed_const(7);
1385         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1386         bandwidth.full = dfixed_mul(dram_channels, yclk);
1387         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1388
1389         return dfixed_trunc(bandwidth);
1390 }
1391
1392 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1393 {
1394         /* Calculate DRAM Bandwidth and the part allocated to display. */
1395         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1396         fixed20_12 yclk, dram_channels, bandwidth;
1397         fixed20_12 a;
1398
1399         a.full = dfixed_const(1000);
1400         yclk.full = dfixed_const(wm->yclk);
1401         yclk.full = dfixed_div(yclk, a);
1402         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1403         a.full = dfixed_const(10);
1404         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1405         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1406         bandwidth.full = dfixed_mul(dram_channels, yclk);
1407         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1408
1409         return dfixed_trunc(bandwidth);
1410 }
1411
1412 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1413 {
1414         /* Calculate the display Data return Bandwidth */
1415         fixed20_12 return_efficiency; /* 0.8 */
1416         fixed20_12 sclk, bandwidth;
1417         fixed20_12 a;
1418
1419         a.full = dfixed_const(1000);
1420         sclk.full = dfixed_const(wm->sclk);
1421         sclk.full = dfixed_div(sclk, a);
1422         a.full = dfixed_const(10);
1423         return_efficiency.full = dfixed_const(8);
1424         return_efficiency.full = dfixed_div(return_efficiency, a);
1425         a.full = dfixed_const(32);
1426         bandwidth.full = dfixed_mul(a, sclk);
1427         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1428
1429         return dfixed_trunc(bandwidth);
1430 }
1431
1432 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1433 {
1434         return 32;
1435 }
1436
1437 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1438 {
1439         /* Calculate the DMIF Request Bandwidth */
1440         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1441         fixed20_12 disp_clk, sclk, bandwidth;
1442         fixed20_12 a, b1, b2;
1443         u32 min_bandwidth;
1444
1445         a.full = dfixed_const(1000);
1446         disp_clk.full = dfixed_const(wm->disp_clk);
1447         disp_clk.full = dfixed_div(disp_clk, a);
1448         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1449         b1.full = dfixed_mul(a, disp_clk);
1450
1451         a.full = dfixed_const(1000);
1452         sclk.full = dfixed_const(wm->sclk);
1453         sclk.full = dfixed_div(sclk, a);
1454         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1455         b2.full = dfixed_mul(a, sclk);
1456
1457         a.full = dfixed_const(10);
1458         disp_clk_request_efficiency.full = dfixed_const(8);
1459         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1460
1461         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1462
1463         a.full = dfixed_const(min_bandwidth);
1464         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1465
1466         return dfixed_trunc(bandwidth);
1467 }
1468
1469 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1470 {
1471         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1472         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1473         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1474         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1475
1476         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1477 }
1478
1479 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1480 {
1481         /* Calculate the display mode Average Bandwidth
1482          * DisplayMode should contain the source and destination dimensions,
1483          * timing, etc.
1484          */
1485         fixed20_12 bpp;
1486         fixed20_12 line_time;
1487         fixed20_12 src_width;
1488         fixed20_12 bandwidth;
1489         fixed20_12 a;
1490
1491         a.full = dfixed_const(1000);
1492         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1493         line_time.full = dfixed_div(line_time, a);
1494         bpp.full = dfixed_const(wm->bytes_per_pixel);
1495         src_width.full = dfixed_const(wm->src_width);
1496         bandwidth.full = dfixed_mul(src_width, bpp);
1497         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1498         bandwidth.full = dfixed_div(bandwidth, line_time);
1499
1500         return dfixed_trunc(bandwidth);
1501 }
1502
1503 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1504 {
1505         /* First calcualte the latency in ns */
1506         u32 mc_latency = 2000; /* 2000 ns. */
1507         u32 available_bandwidth = dce6_available_bandwidth(wm);
1508         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1509         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1510         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1511         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1512                 (wm->num_heads * cursor_line_pair_return_time);
1513         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1514         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1515         u32 tmp, dmif_size = 12288;
1516         fixed20_12 a, b, c;
1517
1518         if (wm->num_heads == 0)
1519                 return 0;
1520
1521         a.full = dfixed_const(2);
1522         b.full = dfixed_const(1);
1523         if ((wm->vsc.full > a.full) ||
1524             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1525             (wm->vtaps >= 5) ||
1526             ((wm->vsc.full >= a.full) && wm->interlaced))
1527                 max_src_lines_per_dst_line = 4;
1528         else
1529                 max_src_lines_per_dst_line = 2;
1530
1531         a.full = dfixed_const(available_bandwidth);
1532         b.full = dfixed_const(wm->num_heads);
1533         a.full = dfixed_div(a, b);
1534
1535         b.full = dfixed_const(mc_latency + 512);
1536         c.full = dfixed_const(wm->disp_clk);
1537         b.full = dfixed_div(b, c);
1538
1539         c.full = dfixed_const(dmif_size);
1540         b.full = dfixed_div(c, b);
1541
1542         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1543
1544         b.full = dfixed_const(1000);
1545         c.full = dfixed_const(wm->disp_clk);
1546         b.full = dfixed_div(c, b);
1547         c.full = dfixed_const(wm->bytes_per_pixel);
1548         b.full = dfixed_mul(b, c);
1549
1550         lb_fill_bw = min(tmp, dfixed_trunc(b));
1551
1552         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1553         b.full = dfixed_const(1000);
1554         c.full = dfixed_const(lb_fill_bw);
1555         b.full = dfixed_div(c, b);
1556         a.full = dfixed_div(a, b);
1557         line_fill_time = dfixed_trunc(a);
1558
1559         if (line_fill_time < wm->active_time)
1560                 return latency;
1561         else
1562                 return latency + (line_fill_time - wm->active_time);
1563
1564 }
1565
1566 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1567 {
1568         if (dce6_average_bandwidth(wm) <=
1569             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1570                 return true;
1571         else
1572                 return false;
1573 };
1574
1575 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1576 {
1577         if (dce6_average_bandwidth(wm) <=
1578             (dce6_available_bandwidth(wm) / wm->num_heads))
1579                 return true;
1580         else
1581                 return false;
1582 };
1583
1584 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1585 {
1586         u32 lb_partitions = wm->lb_size / wm->src_width;
1587         u32 line_time = wm->active_time + wm->blank_time;
1588         u32 latency_tolerant_lines;
1589         u32 latency_hiding;
1590         fixed20_12 a;
1591
1592         a.full = dfixed_const(1);
1593         if (wm->vsc.full > a.full)
1594                 latency_tolerant_lines = 1;
1595         else {
1596                 if (lb_partitions <= (wm->vtaps + 1))
1597                         latency_tolerant_lines = 1;
1598                 else
1599                         latency_tolerant_lines = 2;
1600         }
1601
1602         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1603
1604         if (dce6_latency_watermark(wm) <= latency_hiding)
1605                 return true;
1606         else
1607                 return false;
1608 }
1609
1610 static void dce6_program_watermarks(struct radeon_device *rdev,
1611                                          struct radeon_crtc *radeon_crtc,
1612                                          u32 lb_size, u32 num_heads)
1613 {
1614         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1615         struct dce6_wm_params wm;
1616         u32 pixel_period;
1617         u32 line_time = 0;
1618         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1619         u32 priority_a_mark = 0, priority_b_mark = 0;
1620         u32 priority_a_cnt = PRIORITY_OFF;
1621         u32 priority_b_cnt = PRIORITY_OFF;
1622         u32 tmp, arb_control3;
1623         fixed20_12 a, b, c;
1624
1625         if (radeon_crtc->base.enabled && num_heads && mode) {
1626                 pixel_period = 1000000 / (u32)mode->clock;
1627                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1628                 priority_a_cnt = 0;
1629                 priority_b_cnt = 0;
1630
1631                 wm.yclk = rdev->pm.current_mclk * 10;
1632                 wm.sclk = rdev->pm.current_sclk * 10;
1633                 wm.disp_clk = mode->clock;
1634                 wm.src_width = mode->crtc_hdisplay;
1635                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1636                 wm.blank_time = line_time - wm.active_time;
1637                 wm.interlaced = false;
1638                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1639                         wm.interlaced = true;
1640                 wm.vsc = radeon_crtc->vsc;
1641                 wm.vtaps = 1;
1642                 if (radeon_crtc->rmx_type != RMX_OFF)
1643                         wm.vtaps = 2;
1644                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1645                 wm.lb_size = lb_size;
1646                 if (rdev->family == CHIP_ARUBA)
1647                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1648                 else
1649                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1650                 wm.num_heads = num_heads;
1651
1652                 /* set for high clocks */
1653                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1654                 /* set for low clocks */
1655                 /* wm.yclk = low clk; wm.sclk = low clk */
1656                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1657
1658                 /* possibly force display priority to high */
1659                 /* should really do this at mode validation time... */
1660                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1661                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1662                     !dce6_check_latency_hiding(&wm) ||
1663                     (rdev->disp_priority == 2)) {
1664                         DRM_DEBUG_KMS("force priority to high\n");
1665                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1666                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1667                 }
1668
1669                 a.full = dfixed_const(1000);
1670                 b.full = dfixed_const(mode->clock);
1671                 b.full = dfixed_div(b, a);
1672                 c.full = dfixed_const(latency_watermark_a);
1673                 c.full = dfixed_mul(c, b);
1674                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1675                 c.full = dfixed_div(c, a);
1676                 a.full = dfixed_const(16);
1677                 c.full = dfixed_div(c, a);
1678                 priority_a_mark = dfixed_trunc(c);
1679                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1680
1681                 a.full = dfixed_const(1000);
1682                 b.full = dfixed_const(mode->clock);
1683                 b.full = dfixed_div(b, a);
1684                 c.full = dfixed_const(latency_watermark_b);
1685                 c.full = dfixed_mul(c, b);
1686                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1687                 c.full = dfixed_div(c, a);
1688                 a.full = dfixed_const(16);
1689                 c.full = dfixed_div(c, a);
1690                 priority_b_mark = dfixed_trunc(c);
1691                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1692         }
1693
1694         /* select wm A */
1695         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1696         tmp = arb_control3;
1697         tmp &= ~LATENCY_WATERMARK_MASK(3);
1698         tmp |= LATENCY_WATERMARK_MASK(1);
1699         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1700         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1701                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1702                 LATENCY_HIGH_WATERMARK(line_time)));
1703         /* select wm B */
1704         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1705         tmp &= ~LATENCY_WATERMARK_MASK(3);
1706         tmp |= LATENCY_WATERMARK_MASK(2);
1707         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1708         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1709                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1710                 LATENCY_HIGH_WATERMARK(line_time)));
1711         /* restore original selection */
1712         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1713
1714         /* write the priority marks */
1715         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1716         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1717
1718 }
1719
1720 void dce6_bandwidth_update(struct radeon_device *rdev)
1721 {
1722         struct drm_display_mode *mode0 = NULL;
1723         struct drm_display_mode *mode1 = NULL;
1724         u32 num_heads = 0, lb_size;
1725         int i;
1726
1727         radeon_update_display_priority(rdev);
1728
1729         for (i = 0; i < rdev->num_crtc; i++) {
1730                 if (rdev->mode_info.crtcs[i]->base.enabled)
1731                         num_heads++;
1732         }
1733         for (i = 0; i < rdev->num_crtc; i += 2) {
1734                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1735                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1736                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1737                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1738                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1739                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1740         }
1741 }
1742
1743 /*
1744  * Core functions
1745  */
1746 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1747 {
1748         const u32 num_tile_mode_states = 32;
1749         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1750
1751         switch (rdev->config.si.mem_row_size_in_kb) {
1752         case 1:
1753                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754                 break;
1755         case 2:
1756         default:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758                 break;
1759         case 4:
1760                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761                 break;
1762         }
1763
1764         if ((rdev->family == CHIP_TAHITI) ||
1765             (rdev->family == CHIP_PITCAIRN)) {
1766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767                         switch (reg_offset) {
1768                         case 0:  /* non-AA compressed depth or any compressed stencil */
1769                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1771                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1772                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1773                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1774                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1777                                 break;
1778                         case 1:  /* 2xAA/4xAA compressed depth only */
1779                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1781                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1782                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1783                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1784                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1785                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1787                                 break;
1788                         case 2:  /* 8xAA compressed depth only */
1789                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1791                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1792                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1793                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1794                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1797                                 break;
1798                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1800                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1801                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1802                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1803                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1804                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1807                                 break;
1808                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1810                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1811                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1812                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1813                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1814                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1815                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1816                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1817                                 break;
1818                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1821                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1822                                                  TILE_SPLIT(split_equal_to_row_size) |
1823                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1824                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1827                                 break;
1828                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832                                                  TILE_SPLIT(split_equal_to_row_size) |
1833                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1834                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1837                                 break;
1838                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1839                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1840                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1841                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842                                                  TILE_SPLIT(split_equal_to_row_size) |
1843                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1844                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1845                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1846                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1847                                 break;
1848                         case 8:  /* 1D and 1D Array Surfaces */
1849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1850                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1851                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1854                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1855                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1856                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1857                                 break;
1858                         case 9:  /* Displayable maps. */
1859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1860                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1861                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1862                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1863                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1864                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1867                                 break;
1868                         case 10:  /* Display 8bpp. */
1869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1871                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1872                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1873                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1874                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1875                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1876                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1877                                 break;
1878                         case 11:  /* Display 16bpp. */
1879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1880                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1881                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1882                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1883                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1884                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1887                                 break;
1888                         case 12:  /* Display 32bpp. */
1889                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1890                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1892                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1893                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1894                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1897                                 break;
1898                         case 13:  /* Thin. */
1899                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1900                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1901                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1902                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1903                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1904                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1906                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1907                                 break;
1908                         case 14:  /* Thin 8 bpp. */
1909                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1910                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1911                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1912                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1913                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1914                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1917                                 break;
1918                         case 15:  /* Thin 16 bpp. */
1919                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1920                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1921                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1922                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1923                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1924                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1927                                 break;
1928                         case 16:  /* Thin 32 bpp. */
1929                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1930                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1931                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1932                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1933                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1934                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1936                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1937                                 break;
1938                         case 17:  /* Thin 64 bpp. */
1939                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1941                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1942                                                  TILE_SPLIT(split_equal_to_row_size) |
1943                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1944                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1947                                 break;
1948                         case 21:  /* 8 bpp PRT. */
1949                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1951                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1953                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1954                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957                                 break;
1958                         case 22:  /* 16 bpp PRT */
1959                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1961                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1963                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1964                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1967                                 break;
1968                         case 23:  /* 32 bpp PRT */
1969                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1971                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1974                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977                                 break;
1978                         case 24:  /* 64 bpp PRT */
1979                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1981                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1983                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1984                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1986                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987                                 break;
1988                         case 25:  /* 128 bpp PRT */
1989                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1991                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1993                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
1994                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1996                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1997                                 break;
1998                         default:
1999                                 gb_tile_moden = 0;
2000                                 break;
2001                         }
2002                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2003                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2004                 }
2005         } else if ((rdev->family == CHIP_VERDE) ||
2006                    (rdev->family == CHIP_OLAND) ||
2007                    (rdev->family == CHIP_HAINAN)) {
2008                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2009                         switch (reg_offset) {
2010                         case 0:  /* non-AA compressed depth or any compressed stencil */
2011                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2012                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2013                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2014                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2015                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2016                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2018                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2019                                 break;
2020                         case 1:  /* 2xAA/4xAA compressed depth only */
2021                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2022                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2023                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2024                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2025                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2026                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2027                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2028                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2029                                 break;
2030                         case 2:  /* 8xAA compressed depth only */
2031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2032                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2033                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2034                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2035                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2036                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2037                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2038                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2039                                 break;
2040                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2041                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2043                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2044                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2045                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2046                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2049                                 break;
2050                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2051                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2053                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2055                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2056                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2057                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2058                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2059                                 break;
2060                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2061                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2062                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2063                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064                                                  TILE_SPLIT(split_equal_to_row_size) |
2065                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2066                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2067                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2068                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2069                                 break;
2070                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2071                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2072                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2073                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2074                                                  TILE_SPLIT(split_equal_to_row_size) |
2075                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2076                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2079                                 break;
2080                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2081                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2082                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2083                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2084                                                  TILE_SPLIT(split_equal_to_row_size) |
2085                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2086                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2087                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2088                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2089                                 break;
2090                         case 8:  /* 1D and 1D Array Surfaces */
2091                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2092                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2093                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2094                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2095                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2096                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2097                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2098                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2099                                 break;
2100                         case 9:  /* Displayable maps. */
2101                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2102                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2103                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2104                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2105                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2106                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2107                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2108                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2109                                 break;
2110                         case 10:  /* Display 8bpp. */
2111                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2113                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2115                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2116                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2119                                 break;
2120                         case 11:  /* Display 16bpp. */
2121                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2124                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2125                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2126                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2127                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2128                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2129                                 break;
2130                         case 12:  /* Display 32bpp. */
2131                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2133                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2134                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2135                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2136                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2138                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2139                                 break;
2140                         case 13:  /* Thin. */
2141                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2143                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2145                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2146                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2148                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2149                                 break;
2150                         case 14:  /* Thin 8 bpp. */
2151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2153                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2155                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2156                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2158                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2159                                 break;
2160                         case 15:  /* Thin 16 bpp. */
2161                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2163                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2164                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2165                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2166                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2168                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2169                                 break;
2170                         case 16:  /* Thin 32 bpp. */
2171                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2173                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2175                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2176                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2178                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2179                                 break;
2180                         case 17:  /* Thin 64 bpp. */
2181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2183                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184                                                  TILE_SPLIT(split_equal_to_row_size) |
2185                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2186                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2189                                 break;
2190                         case 21:  /* 8 bpp PRT. */
2191                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2192                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2193                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2194                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2195                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2196                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2197                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2198                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2199                                 break;
2200                         case 22:  /* 16 bpp PRT */
2201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2203                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2204                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2205                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2206                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2209                                 break;
2210                         case 23:  /* 32 bpp PRT */
2211                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2213                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2214                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2216                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2218                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2219                                 break;
2220                         case 24:  /* 64 bpp PRT */
2221                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2223                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2224                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2225                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2226                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2228                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2229                                 break;
2230                         case 25:  /* 128 bpp PRT */
2231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2233                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2234                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2235                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2236                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2239                                 break;
2240                         default:
2241                                 gb_tile_moden = 0;
2242                                 break;
2243                         }
2244                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2245                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2246                 }
2247         } else
2248                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2249 }
2250
2251 static void si_select_se_sh(struct radeon_device *rdev,
2252                             u32 se_num, u32 sh_num)
2253 {
2254         u32 data = INSTANCE_BROADCAST_WRITES;
2255
2256         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2257                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2258         else if (se_num == 0xffffffff)
2259                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2260         else if (sh_num == 0xffffffff)
2261                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2262         else
2263                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2264         WREG32(GRBM_GFX_INDEX, data);
2265 }
2266
2267 static u32 si_create_bitmask(u32 bit_width)
2268 {
2269         u32 i, mask = 0;
2270
2271         for (i = 0; i < bit_width; i++) {
2272                 mask <<= 1;
2273                 mask |= 1;
2274         }
2275         return mask;
2276 }
2277
2278 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2279 {
2280         u32 data, mask;
2281
2282         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2283         if (data & 1)
2284                 data &= INACTIVE_CUS_MASK;
2285         else
2286                 data = 0;
2287         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2288
2289         data >>= INACTIVE_CUS_SHIFT;
2290
2291         mask = si_create_bitmask(cu_per_sh);
2292
2293         return ~data & mask;
2294 }
2295
2296 static void si_setup_spi(struct radeon_device *rdev,
2297                          u32 se_num, u32 sh_per_se,
2298                          u32 cu_per_sh)
2299 {
2300         int i, j, k;
2301         u32 data, mask, active_cu;
2302
2303         for (i = 0; i < se_num; i++) {
2304                 for (j = 0; j < sh_per_se; j++) {
2305                         si_select_se_sh(rdev, i, j);
2306                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2307                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2308
2309                         mask = 1;
2310                         for (k = 0; k < 16; k++) {
2311                                 mask <<= k;
2312                                 if (active_cu & mask) {
2313                                         data &= ~mask;
2314                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2315                                         break;
2316                                 }
2317                         }
2318                 }
2319         }
2320         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2321 }
2322
2323 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2324                               u32 max_rb_num, u32 se_num,
2325                               u32 sh_per_se)
2326 {
2327         u32 data, mask;
2328
2329         data = RREG32(CC_RB_BACKEND_DISABLE);
2330         if (data & 1)
2331                 data &= BACKEND_DISABLE_MASK;
2332         else
2333                 data = 0;
2334         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2335
2336         data >>= BACKEND_DISABLE_SHIFT;
2337
2338         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2339
2340         return data & mask;
2341 }
2342
2343 static void si_setup_rb(struct radeon_device *rdev,
2344                         u32 se_num, u32 sh_per_se,
2345                         u32 max_rb_num)
2346 {
2347         int i, j;
2348         u32 data, mask;
2349         u32 disabled_rbs = 0;
2350         u32 enabled_rbs = 0;
2351
2352         for (i = 0; i < se_num; i++) {
2353                 for (j = 0; j < sh_per_se; j++) {
2354                         si_select_se_sh(rdev, i, j);
2355                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2356                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2357                 }
2358         }
2359         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2360
2361         mask = 1;
2362         for (i = 0; i < max_rb_num; i++) {
2363                 if (!(disabled_rbs & mask))
2364                         enabled_rbs |= mask;
2365                 mask <<= 1;
2366         }
2367
2368         for (i = 0; i < se_num; i++) {
2369                 si_select_se_sh(rdev, i, 0xffffffff);
2370                 data = 0;
2371                 for (j = 0; j < sh_per_se; j++) {
2372                         switch (enabled_rbs & 3) {
2373                         case 1:
2374                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2375                                 break;
2376                         case 2:
2377                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2378                                 break;
2379                         case 3:
2380                         default:
2381                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2382                                 break;
2383                         }
2384                         enabled_rbs >>= 2;
2385                 }
2386                 WREG32(PA_SC_RASTER_CONFIG, data);
2387         }
2388         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2389 }
2390
2391 static void si_gpu_init(struct radeon_device *rdev)
2392 {
2393         u32 gb_addr_config = 0;
2394         u32 mc_shared_chmap, mc_arb_ramcfg;
2395         u32 sx_debug_1;
2396         u32 hdp_host_path_cntl;
2397         u32 tmp;
2398         int i, j;
2399
2400         switch (rdev->family) {
2401         case CHIP_TAHITI:
2402                 rdev->config.si.max_shader_engines = 2;
2403                 rdev->config.si.max_tile_pipes = 12;
2404                 rdev->config.si.max_cu_per_sh = 8;
2405                 rdev->config.si.max_sh_per_se = 2;
2406                 rdev->config.si.max_backends_per_se = 4;
2407                 rdev->config.si.max_texture_channel_caches = 12;
2408                 rdev->config.si.max_gprs = 256;
2409                 rdev->config.si.max_gs_threads = 32;
2410                 rdev->config.si.max_hw_contexts = 8;
2411
2412                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2413                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2414                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2415                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2416                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2417                 break;
2418         case CHIP_PITCAIRN:
2419                 rdev->config.si.max_shader_engines = 2;
2420                 rdev->config.si.max_tile_pipes = 8;
2421                 rdev->config.si.max_cu_per_sh = 5;
2422                 rdev->config.si.max_sh_per_se = 2;
2423                 rdev->config.si.max_backends_per_se = 4;
2424                 rdev->config.si.max_texture_channel_caches = 8;
2425                 rdev->config.si.max_gprs = 256;
2426                 rdev->config.si.max_gs_threads = 32;
2427                 rdev->config.si.max_hw_contexts = 8;
2428
2429                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2430                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2431                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2432                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2433                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2434                 break;
2435         case CHIP_VERDE:
2436         default:
2437                 rdev->config.si.max_shader_engines = 1;
2438                 rdev->config.si.max_tile_pipes = 4;
2439                 rdev->config.si.max_cu_per_sh = 2;
2440                 rdev->config.si.max_sh_per_se = 2;
2441                 rdev->config.si.max_backends_per_se = 4;
2442                 rdev->config.si.max_texture_channel_caches = 4;
2443                 rdev->config.si.max_gprs = 256;
2444                 rdev->config.si.max_gs_threads = 32;
2445                 rdev->config.si.max_hw_contexts = 8;
2446
2447                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2448                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2449                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2450                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2451                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2452                 break;
2453         case CHIP_OLAND:
2454                 rdev->config.si.max_shader_engines = 1;
2455                 rdev->config.si.max_tile_pipes = 4;
2456                 rdev->config.si.max_cu_per_sh = 6;
2457                 rdev->config.si.max_sh_per_se = 1;
2458                 rdev->config.si.max_backends_per_se = 2;
2459                 rdev->config.si.max_texture_channel_caches = 4;
2460                 rdev->config.si.max_gprs = 256;
2461                 rdev->config.si.max_gs_threads = 16;
2462                 rdev->config.si.max_hw_contexts = 8;
2463
2464                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2465                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2466                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2467                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2468                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2469                 break;
2470         case CHIP_HAINAN:
2471                 rdev->config.si.max_shader_engines = 1;
2472                 rdev->config.si.max_tile_pipes = 4;
2473                 rdev->config.si.max_cu_per_sh = 5;
2474                 rdev->config.si.max_sh_per_se = 1;
2475                 rdev->config.si.max_backends_per_se = 1;
2476                 rdev->config.si.max_texture_channel_caches = 2;
2477                 rdev->config.si.max_gprs = 256;
2478                 rdev->config.si.max_gs_threads = 16;
2479                 rdev->config.si.max_hw_contexts = 8;
2480
2481                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2482                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2483                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2484                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2485                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2486                 break;
2487         }
2488
2489         /* Initialize HDP */
2490         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2491                 WREG32((0x2c14 + j), 0x00000000);
2492                 WREG32((0x2c18 + j), 0x00000000);
2493                 WREG32((0x2c1c + j), 0x00000000);
2494                 WREG32((0x2c20 + j), 0x00000000);
2495                 WREG32((0x2c24 + j), 0x00000000);
2496         }
2497
2498         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2499
2500         evergreen_fix_pci_max_read_req_size(rdev);
2501
2502         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2503
2504         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2505         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2506
2507         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2508         rdev->config.si.mem_max_burst_length_bytes = 256;
2509         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2510         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2511         if (rdev->config.si.mem_row_size_in_kb > 4)
2512                 rdev->config.si.mem_row_size_in_kb = 4;
2513         /* XXX use MC settings? */
2514         rdev->config.si.shader_engine_tile_size = 32;
2515         rdev->config.si.num_gpus = 1;
2516         rdev->config.si.multi_gpu_tile_size = 64;
2517
2518         /* fix up row size */
2519         gb_addr_config &= ~ROW_SIZE_MASK;
2520         switch (rdev->config.si.mem_row_size_in_kb) {
2521         case 1:
2522         default:
2523                 gb_addr_config |= ROW_SIZE(0);
2524                 break;
2525         case 2:
2526                 gb_addr_config |= ROW_SIZE(1);
2527                 break;
2528         case 4:
2529                 gb_addr_config |= ROW_SIZE(2);
2530                 break;
2531         }
2532
2533         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2534          * not have bank info, so create a custom tiling dword.
2535          * bits 3:0   num_pipes
2536          * bits 7:4   num_banks
2537          * bits 11:8  group_size
2538          * bits 15:12 row_size
2539          */
2540         rdev->config.si.tile_config = 0;
2541         switch (rdev->config.si.num_tile_pipes) {
2542         case 1:
2543                 rdev->config.si.tile_config |= (0 << 0);
2544                 break;
2545         case 2:
2546                 rdev->config.si.tile_config |= (1 << 0);
2547                 break;
2548         case 4:
2549                 rdev->config.si.tile_config |= (2 << 0);
2550                 break;
2551         case 8:
2552         default:
2553                 /* XXX what about 12? */
2554                 rdev->config.si.tile_config |= (3 << 0);
2555                 break;
2556         }       
2557         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2558         case 0: /* four banks */
2559                 rdev->config.si.tile_config |= 0 << 4;
2560                 break;
2561         case 1: /* eight banks */
2562                 rdev->config.si.tile_config |= 1 << 4;
2563                 break;
2564         case 2: /* sixteen banks */
2565         default:
2566                 rdev->config.si.tile_config |= 2 << 4;
2567                 break;
2568         }
2569         rdev->config.si.tile_config |=
2570                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2571         rdev->config.si.tile_config |=
2572                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2573
2574         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2575         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2576         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2577         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2578         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2579         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2580         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2581         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2582         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2583
2584         si_tiling_mode_table_init(rdev);
2585
2586         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2587                     rdev->config.si.max_sh_per_se,
2588                     rdev->config.si.max_backends_per_se);
2589
2590         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2591                      rdev->config.si.max_sh_per_se,
2592                      rdev->config.si.max_cu_per_sh);
2593
2594
2595         /* set HW defaults for 3D engine */
2596         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2597                                      ROQ_IB2_START(0x2b)));
2598         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2599
2600         sx_debug_1 = RREG32(SX_DEBUG_1);
2601         WREG32(SX_DEBUG_1, sx_debug_1);
2602
2603         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2604
2605         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2606                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2607                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2608                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2609
2610         WREG32(VGT_NUM_INSTANCES, 1);
2611
2612         WREG32(CP_PERFMON_CNTL, 0);
2613
2614         WREG32(SQ_CONFIG, 0);
2615
2616         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2617                                           FORCE_EOV_MAX_REZ_CNT(255)));
2618
2619         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2620                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2621
2622         WREG32(VGT_GS_VERTEX_REUSE, 16);
2623         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2624
2625         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2626         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2627         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2628         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2629         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2630         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2631         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2632         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2633
2634         tmp = RREG32(HDP_MISC_CNTL);
2635         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2636         WREG32(HDP_MISC_CNTL, tmp);
2637
2638         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2639         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2640
2641         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2642
2643         udelay(50);
2644 }
2645
2646 /*
2647  * GPU scratch registers helpers function.
2648  */
2649 static void si_scratch_init(struct radeon_device *rdev)
2650 {
2651         int i;
2652
2653         rdev->scratch.num_reg = 7;
2654         rdev->scratch.reg_base = SCRATCH_REG0;
2655         for (i = 0; i < rdev->scratch.num_reg; i++) {
2656                 rdev->scratch.free[i] = true;
2657                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2658         }
2659 }
2660
2661 void si_fence_ring_emit(struct radeon_device *rdev,
2662                         struct radeon_fence *fence)
2663 {
2664         struct radeon_ring *ring = &rdev->ring[fence->ring];
2665         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2666
2667         /* flush read cache over gart */
2668         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2669         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2670         radeon_ring_write(ring, 0);
2671         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2672         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2673                           PACKET3_TC_ACTION_ENA |
2674                           PACKET3_SH_KCACHE_ACTION_ENA |
2675                           PACKET3_SH_ICACHE_ACTION_ENA);
2676         radeon_ring_write(ring, 0xFFFFFFFF);
2677         radeon_ring_write(ring, 0);
2678         radeon_ring_write(ring, 10); /* poll interval */
2679         /* EVENT_WRITE_EOP - flush caches, send int */
2680         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2681         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2682         radeon_ring_write(ring, addr & 0xffffffff);
2683         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2684         radeon_ring_write(ring, fence->seq);
2685         radeon_ring_write(ring, 0);
2686 }
2687
2688 /*
2689  * IB stuff
2690  */
2691 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2692 {
2693         struct radeon_ring *ring = &rdev->ring[ib->ring];
2694         u32 header;
2695
2696         if (ib->is_const_ib) {
2697                 /* set switch buffer packet before const IB */
2698                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2699                 radeon_ring_write(ring, 0);
2700
2701                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2702         } else {
2703                 u32 next_rptr;
2704                 if (ring->rptr_save_reg) {
2705                         next_rptr = ring->wptr + 3 + 4 + 8;
2706                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2707                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2708                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2709                         radeon_ring_write(ring, next_rptr);
2710                 } else if (rdev->wb.enabled) {
2711                         next_rptr = ring->wptr + 5 + 4 + 8;
2712                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2713                         radeon_ring_write(ring, (1 << 8));
2714                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2715                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2716                         radeon_ring_write(ring, next_rptr);
2717                 }
2718
2719                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2720         }
2721
2722         radeon_ring_write(ring, header);
2723         radeon_ring_write(ring,
2724 #ifdef __BIG_ENDIAN
2725                           (2 << 0) |
2726 #endif
2727                           (ib->gpu_addr & 0xFFFFFFFC));
2728         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2729         radeon_ring_write(ring, ib->length_dw |
2730                           (ib->vm ? (ib->vm->id << 24) : 0));
2731
2732         if (!ib->is_const_ib) {
2733                 /* flush read cache over gart for this vmid */
2734                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2735                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2736                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2737                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2738                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2739                                   PACKET3_TC_ACTION_ENA |
2740                                   PACKET3_SH_KCACHE_ACTION_ENA |
2741                                   PACKET3_SH_ICACHE_ACTION_ENA);
2742                 radeon_ring_write(ring, 0xFFFFFFFF);
2743                 radeon_ring_write(ring, 0);
2744                 radeon_ring_write(ring, 10); /* poll interval */
2745         }
2746 }
2747
2748 /*
2749  * CP.
2750  */
2751 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2752 {
2753         if (enable)
2754                 WREG32(CP_ME_CNTL, 0);
2755         else {
2756                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2757                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2758                 WREG32(SCRATCH_UMSK, 0);
2759                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2760                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2761                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2762         }
2763         udelay(50);
2764 }
2765
2766 static int si_cp_load_microcode(struct radeon_device *rdev)
2767 {
2768         const __be32 *fw_data;
2769         int i;
2770
2771         if (!rdev->me_fw || !rdev->pfp_fw)
2772                 return -EINVAL;
2773
2774         si_cp_enable(rdev, false);
2775
2776         /* PFP */
2777         fw_data = (const __be32 *)rdev->pfp_fw->data;
2778         WREG32(CP_PFP_UCODE_ADDR, 0);
2779         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2780                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2781         WREG32(CP_PFP_UCODE_ADDR, 0);
2782
2783         /* CE */
2784         fw_data = (const __be32 *)rdev->ce_fw->data;
2785         WREG32(CP_CE_UCODE_ADDR, 0);
2786         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2787                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2788         WREG32(CP_CE_UCODE_ADDR, 0);
2789
2790         /* ME */
2791         fw_data = (const __be32 *)rdev->me_fw->data;
2792         WREG32(CP_ME_RAM_WADDR, 0);
2793         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2794                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2795         WREG32(CP_ME_RAM_WADDR, 0);
2796
2797         WREG32(CP_PFP_UCODE_ADDR, 0);
2798         WREG32(CP_CE_UCODE_ADDR, 0);
2799         WREG32(CP_ME_RAM_WADDR, 0);
2800         WREG32(CP_ME_RAM_RADDR, 0);
2801         return 0;
2802 }
2803
2804 static int si_cp_start(struct radeon_device *rdev)
2805 {
2806         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2807         int r, i;
2808
2809         r = radeon_ring_lock(rdev, ring, 7 + 4);
2810         if (r) {
2811                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2812                 return r;
2813         }
2814         /* init the CP */
2815         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2816         radeon_ring_write(ring, 0x1);
2817         radeon_ring_write(ring, 0x0);
2818         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2819         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2820         radeon_ring_write(ring, 0);
2821         radeon_ring_write(ring, 0);
2822
2823         /* init the CE partitions */
2824         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2825         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2826         radeon_ring_write(ring, 0xc000);
2827         radeon_ring_write(ring, 0xe000);
2828         radeon_ring_unlock_commit(rdev, ring);
2829
2830         si_cp_enable(rdev, true);
2831
2832         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2833         if (r) {
2834                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2835                 return r;
2836         }
2837
2838         /* setup clear context state */
2839         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2840         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2841
2842         for (i = 0; i < si_default_size; i++)
2843                 radeon_ring_write(ring, si_default_state[i]);
2844
2845         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2846         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2847
2848         /* set clear context state */
2849         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2850         radeon_ring_write(ring, 0);
2851
2852         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2853         radeon_ring_write(ring, 0x00000316);
2854         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2855         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2856
2857         radeon_ring_unlock_commit(rdev, ring);
2858
2859         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2860                 ring = &rdev->ring[i];
2861                 r = radeon_ring_lock(rdev, ring, 2);
2862
2863                 /* clear the compute context state */
2864                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2865                 radeon_ring_write(ring, 0);
2866
2867                 radeon_ring_unlock_commit(rdev, ring);
2868         }
2869
2870         return 0;
2871 }
2872
2873 static void si_cp_fini(struct radeon_device *rdev)
2874 {
2875         struct radeon_ring *ring;
2876         si_cp_enable(rdev, false);
2877
2878         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2879         radeon_ring_fini(rdev, ring);
2880         radeon_scratch_free(rdev, ring->rptr_save_reg);
2881
2882         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2883         radeon_ring_fini(rdev, ring);
2884         radeon_scratch_free(rdev, ring->rptr_save_reg);
2885
2886         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2887         radeon_ring_fini(rdev, ring);
2888         radeon_scratch_free(rdev, ring->rptr_save_reg);
2889 }
2890
2891 static int si_cp_resume(struct radeon_device *rdev)
2892 {
2893         struct radeon_ring *ring;
2894         u32 tmp;
2895         u32 rb_bufsz;
2896         int r;
2897
2898         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2899         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2900                                  SOFT_RESET_PA |
2901                                  SOFT_RESET_VGT |
2902                                  SOFT_RESET_SPI |
2903                                  SOFT_RESET_SX));
2904         RREG32(GRBM_SOFT_RESET);
2905         mdelay(15);
2906         WREG32(GRBM_SOFT_RESET, 0);
2907         RREG32(GRBM_SOFT_RESET);
2908
2909         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2910         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2911
2912         /* Set the write pointer delay */
2913         WREG32(CP_RB_WPTR_DELAY, 0);
2914
2915         WREG32(CP_DEBUG, 0);
2916         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2917
2918         /* ring 0 - compute and gfx */
2919         /* Set ring buffer size */
2920         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2921         rb_bufsz = drm_order(ring->ring_size / 8);
2922         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2923 #ifdef __BIG_ENDIAN
2924         tmp |= BUF_SWAP_32BIT;
2925 #endif
2926         WREG32(CP_RB0_CNTL, tmp);
2927
2928         /* Initialize the ring buffer's read and write pointers */
2929         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2930         ring->wptr = 0;
2931         WREG32(CP_RB0_WPTR, ring->wptr);
2932
2933         /* set the wb address whether it's enabled or not */
2934         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2935         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2936
2937         if (rdev->wb.enabled)
2938                 WREG32(SCRATCH_UMSK, 0xff);
2939         else {
2940                 tmp |= RB_NO_UPDATE;
2941                 WREG32(SCRATCH_UMSK, 0);
2942         }
2943
2944         mdelay(1);
2945         WREG32(CP_RB0_CNTL, tmp);
2946
2947         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2948
2949         ring->rptr = RREG32(CP_RB0_RPTR);
2950
2951         /* ring1  - compute only */
2952         /* Set ring buffer size */
2953         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2954         rb_bufsz = drm_order(ring->ring_size / 8);
2955         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2956 #ifdef __BIG_ENDIAN
2957         tmp |= BUF_SWAP_32BIT;
2958 #endif
2959         WREG32(CP_RB1_CNTL, tmp);
2960
2961         /* Initialize the ring buffer's read and write pointers */
2962         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2963         ring->wptr = 0;
2964         WREG32(CP_RB1_WPTR, ring->wptr);
2965
2966         /* set the wb address whether it's enabled or not */
2967         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2968         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2969
2970         mdelay(1);
2971         WREG32(CP_RB1_CNTL, tmp);
2972
2973         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2974
2975         ring->rptr = RREG32(CP_RB1_RPTR);
2976
2977         /* ring2 - compute only */
2978         /* Set ring buffer size */
2979         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2980         rb_bufsz = drm_order(ring->ring_size / 8);
2981         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2982 #ifdef __BIG_ENDIAN
2983         tmp |= BUF_SWAP_32BIT;
2984 #endif
2985         WREG32(CP_RB2_CNTL, tmp);
2986
2987         /* Initialize the ring buffer's read and write pointers */
2988         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2989         ring->wptr = 0;
2990         WREG32(CP_RB2_WPTR, ring->wptr);
2991
2992         /* set the wb address whether it's enabled or not */
2993         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2994         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2995
2996         mdelay(1);
2997         WREG32(CP_RB2_CNTL, tmp);
2998
2999         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3000
3001         ring->rptr = RREG32(CP_RB2_RPTR);
3002
3003         /* start the rings */
3004         si_cp_start(rdev);
3005         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3006         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3007         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3008         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3009         if (r) {
3010                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3011                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3012                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3013                 return r;
3014         }
3015         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3016         if (r) {
3017                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3018         }
3019         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3020         if (r) {
3021                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3022         }
3023
3024         return 0;
3025 }
3026
3027 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3028 {
3029         u32 reset_mask = 0;
3030         u32 tmp;
3031
3032         /* GRBM_STATUS */
3033         tmp = RREG32(GRBM_STATUS);
3034         if (tmp & (PA_BUSY | SC_BUSY |
3035                    BCI_BUSY | SX_BUSY |
3036                    TA_BUSY | VGT_BUSY |
3037                    DB_BUSY | CB_BUSY |
3038                    GDS_BUSY | SPI_BUSY |
3039                    IA_BUSY | IA_BUSY_NO_DMA))
3040                 reset_mask |= RADEON_RESET_GFX;
3041
3042         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3043                    CP_BUSY | CP_COHERENCY_BUSY))
3044                 reset_mask |= RADEON_RESET_CP;
3045
3046         if (tmp & GRBM_EE_BUSY)
3047                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3048
3049         /* GRBM_STATUS2 */
3050         tmp = RREG32(GRBM_STATUS2);
3051         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3052                 reset_mask |= RADEON_RESET_RLC;
3053
3054         /* DMA_STATUS_REG 0 */
3055         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3056         if (!(tmp & DMA_IDLE))
3057                 reset_mask |= RADEON_RESET_DMA;
3058
3059         /* DMA_STATUS_REG 1 */
3060         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3061         if (!(tmp & DMA_IDLE))
3062                 reset_mask |= RADEON_RESET_DMA1;
3063
3064         /* SRBM_STATUS2 */
3065         tmp = RREG32(SRBM_STATUS2);
3066         if (tmp & DMA_BUSY)
3067                 reset_mask |= RADEON_RESET_DMA;
3068
3069         if (tmp & DMA1_BUSY)
3070                 reset_mask |= RADEON_RESET_DMA1;
3071
3072         /* SRBM_STATUS */
3073         tmp = RREG32(SRBM_STATUS);
3074
3075         if (tmp & IH_BUSY)
3076                 reset_mask |= RADEON_RESET_IH;
3077
3078         if (tmp & SEM_BUSY)
3079                 reset_mask |= RADEON_RESET_SEM;
3080
3081         if (tmp & GRBM_RQ_PENDING)
3082                 reset_mask |= RADEON_RESET_GRBM;
3083
3084         if (tmp & VMC_BUSY)
3085                 reset_mask |= RADEON_RESET_VMC;
3086
3087         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3088                    MCC_BUSY | MCD_BUSY))
3089                 reset_mask |= RADEON_RESET_MC;
3090
3091         if (evergreen_is_display_hung(rdev))
3092                 reset_mask |= RADEON_RESET_DISPLAY;
3093
3094         /* VM_L2_STATUS */
3095         tmp = RREG32(VM_L2_STATUS);
3096         if (tmp & L2_BUSY)
3097                 reset_mask |= RADEON_RESET_VMC;
3098
3099         /* Skip MC reset as it's mostly likely not hung, just busy */
3100         if (reset_mask & RADEON_RESET_MC) {
3101                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3102                 reset_mask &= ~RADEON_RESET_MC;
3103         }
3104
3105         return reset_mask;
3106 }
3107
3108 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3109 {
3110         struct evergreen_mc_save save;
3111         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3112         u32 tmp;
3113
3114         if (reset_mask == 0)
3115                 return;
3116
3117         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3118
3119         evergreen_print_gpu_status_regs(rdev);
3120         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3121                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3122         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3123                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3124
3125         /* Disable CP parsing/prefetching */
3126         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3127
3128         if (reset_mask & RADEON_RESET_DMA) {
3129                 /* dma0 */
3130                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3131                 tmp &= ~DMA_RB_ENABLE;
3132                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3133         }
3134         if (reset_mask & RADEON_RESET_DMA1) {
3135                 /* dma1 */
3136                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3137                 tmp &= ~DMA_RB_ENABLE;
3138                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3139         }
3140
3141         udelay(50);
3142
3143         evergreen_mc_stop(rdev, &save);
3144         if (evergreen_mc_wait_for_idle(rdev)) {
3145                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3146         }
3147
3148         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3149                 grbm_soft_reset = SOFT_RESET_CB |
3150                         SOFT_RESET_DB |
3151                         SOFT_RESET_GDS |
3152                         SOFT_RESET_PA |
3153                         SOFT_RESET_SC |
3154                         SOFT_RESET_BCI |
3155                         SOFT_RESET_SPI |
3156                         SOFT_RESET_SX |
3157                         SOFT_RESET_TC |
3158                         SOFT_RESET_TA |
3159                         SOFT_RESET_VGT |
3160                         SOFT_RESET_IA;
3161         }
3162
3163         if (reset_mask & RADEON_RESET_CP) {
3164                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3165
3166                 srbm_soft_reset |= SOFT_RESET_GRBM;
3167         }
3168
3169         if (reset_mask & RADEON_RESET_DMA)
3170                 srbm_soft_reset |= SOFT_RESET_DMA;
3171
3172         if (reset_mask & RADEON_RESET_DMA1)
3173                 srbm_soft_reset |= SOFT_RESET_DMA1;
3174
3175         if (reset_mask & RADEON_RESET_DISPLAY)
3176                 srbm_soft_reset |= SOFT_RESET_DC;
3177
3178         if (reset_mask & RADEON_RESET_RLC)
3179                 grbm_soft_reset |= SOFT_RESET_RLC;
3180
3181         if (reset_mask & RADEON_RESET_SEM)
3182                 srbm_soft_reset |= SOFT_RESET_SEM;
3183
3184         if (reset_mask & RADEON_RESET_IH)
3185                 srbm_soft_reset |= SOFT_RESET_IH;
3186
3187         if (reset_mask & RADEON_RESET_GRBM)
3188                 srbm_soft_reset |= SOFT_RESET_GRBM;
3189
3190         if (reset_mask & RADEON_RESET_VMC)
3191                 srbm_soft_reset |= SOFT_RESET_VMC;
3192
3193         if (reset_mask & RADEON_RESET_MC)
3194                 srbm_soft_reset |= SOFT_RESET_MC;
3195
3196         if (grbm_soft_reset) {
3197                 tmp = RREG32(GRBM_SOFT_RESET);
3198                 tmp |= grbm_soft_reset;
3199                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3200                 WREG32(GRBM_SOFT_RESET, tmp);
3201                 tmp = RREG32(GRBM_SOFT_RESET);
3202
3203                 udelay(50);
3204
3205                 tmp &= ~grbm_soft_reset;
3206                 WREG32(GRBM_SOFT_RESET, tmp);
3207                 tmp = RREG32(GRBM_SOFT_RESET);
3208         }
3209
3210         if (srbm_soft_reset) {
3211                 tmp = RREG32(SRBM_SOFT_RESET);
3212                 tmp |= srbm_soft_reset;
3213                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3214                 WREG32(SRBM_SOFT_RESET, tmp);
3215                 tmp = RREG32(SRBM_SOFT_RESET);
3216
3217                 udelay(50);
3218
3219                 tmp &= ~srbm_soft_reset;
3220                 WREG32(SRBM_SOFT_RESET, tmp);
3221                 tmp = RREG32(SRBM_SOFT_RESET);
3222         }
3223
3224         /* Wait a little for things to settle down */
3225         udelay(50);
3226
3227         evergreen_mc_resume(rdev, &save);
3228         udelay(50);
3229
3230         evergreen_print_gpu_status_regs(rdev);
3231 }
3232
3233 int si_asic_reset(struct radeon_device *rdev)
3234 {
3235         u32 reset_mask;
3236
3237         reset_mask = si_gpu_check_soft_reset(rdev);
3238
3239         if (reset_mask)
3240                 r600_set_bios_scratch_engine_hung(rdev, true);
3241
3242         si_gpu_soft_reset(rdev, reset_mask);
3243
3244         reset_mask = si_gpu_check_soft_reset(rdev);
3245
3246         if (!reset_mask)
3247                 r600_set_bios_scratch_engine_hung(rdev, false);
3248
3249         return 0;
3250 }
3251
3252 /**
3253  * si_gfx_is_lockup - Check if the GFX engine is locked up
3254  *
3255  * @rdev: radeon_device pointer
3256  * @ring: radeon_ring structure holding ring information
3257  *
3258  * Check if the GFX engine is locked up.
3259  * Returns true if the engine appears to be locked up, false if not.
3260  */
3261 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3262 {
3263         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3264
3265         if (!(reset_mask & (RADEON_RESET_GFX |
3266                             RADEON_RESET_COMPUTE |
3267                             RADEON_RESET_CP))) {
3268                 radeon_ring_lockup_update(ring);
3269                 return false;
3270         }
3271         /* force CP activities */
3272         radeon_ring_force_activity(rdev, ring);
3273         return radeon_ring_test_lockup(rdev, ring);
3274 }
3275
3276 /**
3277  * si_dma_is_lockup - Check if the DMA engine is locked up
3278  *
3279  * @rdev: radeon_device pointer
3280  * @ring: radeon_ring structure holding ring information
3281  *
3282  * Check if the async DMA engine is locked up.
3283  * Returns true if the engine appears to be locked up, false if not.
3284  */
3285 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3286 {
3287         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3288         u32 mask;
3289
3290         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3291                 mask = RADEON_RESET_DMA;
3292         else
3293                 mask = RADEON_RESET_DMA1;
3294
3295         if (!(reset_mask & mask)) {
3296                 radeon_ring_lockup_update(ring);
3297                 return false;
3298         }
3299         /* force ring activities */
3300         radeon_ring_force_activity(rdev, ring);
3301         return radeon_ring_test_lockup(rdev, ring);
3302 }
3303
3304 /* MC */
3305 static void si_mc_program(struct radeon_device *rdev)
3306 {
3307         struct evergreen_mc_save save;
3308         u32 tmp;
3309         int i, j;
3310
3311         /* Initialize HDP */
3312         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3313                 WREG32((0x2c14 + j), 0x00000000);
3314                 WREG32((0x2c18 + j), 0x00000000);
3315                 WREG32((0x2c1c + j), 0x00000000);
3316                 WREG32((0x2c20 + j), 0x00000000);
3317                 WREG32((0x2c24 + j), 0x00000000);
3318         }
3319         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3320
3321         evergreen_mc_stop(rdev, &save);
3322         if (radeon_mc_wait_for_idle(rdev)) {
3323                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3324         }
3325         if (!ASIC_IS_NODCE(rdev))
3326                 /* Lockout access through VGA aperture*/
3327                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3328         /* Update configuration */
3329         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3330                rdev->mc.vram_start >> 12);
3331         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3332                rdev->mc.vram_end >> 12);
3333         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3334                rdev->vram_scratch.gpu_addr >> 12);
3335         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3336         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3337         WREG32(MC_VM_FB_LOCATION, tmp);
3338         /* XXX double check these! */
3339         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3340         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3341         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3342         WREG32(MC_VM_AGP_BASE, 0);
3343         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3344         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3345         if (radeon_mc_wait_for_idle(rdev)) {
3346                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3347         }
3348         evergreen_mc_resume(rdev, &save);
3349         if (!ASIC_IS_NODCE(rdev)) {
3350                 /* we need to own VRAM, so turn off the VGA renderer here
3351                  * to stop it overwriting our objects */
3352                 rv515_vga_render_disable(rdev);
3353         }
3354 }
3355
3356 static void si_vram_gtt_location(struct radeon_device *rdev,
3357                                  struct radeon_mc *mc)
3358 {
3359         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3360                 /* leave room for at least 1024M GTT */
3361                 dev_warn(rdev->dev, "limiting VRAM\n");
3362                 mc->real_vram_size = 0xFFC0000000ULL;
3363                 mc->mc_vram_size = 0xFFC0000000ULL;
3364         }
3365         radeon_vram_location(rdev, &rdev->mc, 0);
3366         rdev->mc.gtt_base_align = 0;
3367         radeon_gtt_location(rdev, mc);
3368 }
3369
3370 static int si_mc_init(struct radeon_device *rdev)
3371 {
3372         u32 tmp;
3373         int chansize, numchan;
3374
3375         /* Get VRAM informations */
3376         rdev->mc.vram_is_ddr = true;
3377         tmp = RREG32(MC_ARB_RAMCFG);
3378         if (tmp & CHANSIZE_OVERRIDE) {
3379                 chansize = 16;
3380         } else if (tmp & CHANSIZE_MASK) {
3381                 chansize = 64;
3382         } else {
3383                 chansize = 32;
3384         }
3385         tmp = RREG32(MC_SHARED_CHMAP);
3386         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3387         case 0:
3388         default:
3389                 numchan = 1;
3390                 break;
3391         case 1:
3392                 numchan = 2;
3393                 break;
3394         case 2:
3395                 numchan = 4;
3396                 break;
3397         case 3:
3398                 numchan = 8;
3399                 break;
3400         case 4:
3401                 numchan = 3;
3402                 break;
3403         case 5:
3404                 numchan = 6;
3405                 break;
3406         case 6:
3407                 numchan = 10;
3408                 break;
3409         case 7:
3410                 numchan = 12;
3411                 break;
3412         case 8:
3413                 numchan = 16;
3414                 break;
3415         }
3416         rdev->mc.vram_width = numchan * chansize;
3417         /* Could aper size report 0 ? */
3418         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3419         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3420         /* size in MB on si */
3421         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3422         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3423         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3424         si_vram_gtt_location(rdev, &rdev->mc);
3425         radeon_update_bandwidth_info(rdev);
3426
3427         return 0;
3428 }
3429
3430 /*
3431  * GART
3432  */
3433 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3434 {
3435         /* flush hdp cache */
3436         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3437
3438         /* bits 0-15 are the VM contexts0-15 */
3439         WREG32(VM_INVALIDATE_REQUEST, 1);
3440 }
3441
3442 static int si_pcie_gart_enable(struct radeon_device *rdev)
3443 {
3444         int r, i;
3445
3446         if (rdev->gart.robj == NULL) {
3447                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3448                 return -EINVAL;
3449         }
3450         r = radeon_gart_table_vram_pin(rdev);
3451         if (r)
3452                 return r;
3453         radeon_gart_restore(rdev);
3454         /* Setup TLB control */
3455         WREG32(MC_VM_MX_L1_TLB_CNTL,
3456                (0xA << 7) |
3457                ENABLE_L1_TLB |
3458                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3459                ENABLE_ADVANCED_DRIVER_MODEL |
3460                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3461         /* Setup L2 cache */
3462         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3463                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3464                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3465                EFFECTIVE_L2_QUEUE_SIZE(7) |
3466                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3467         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3468         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3469                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3470         /* setup context0 */
3471         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3472         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3473         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3474         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3475                         (u32)(rdev->dummy_page.addr >> 12));
3476         WREG32(VM_CONTEXT0_CNTL2, 0);
3477         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3478                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3479
3480         WREG32(0x15D4, 0);
3481         WREG32(0x15D8, 0);
3482         WREG32(0x15DC, 0);
3483
3484         /* empty context1-15 */
3485         /* set vm size, must be a multiple of 4 */
3486         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3487         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3488         /* Assign the pt base to something valid for now; the pts used for
3489          * the VMs are determined by the application and setup and assigned
3490          * on the fly in the vm part of radeon_gart.c
3491          */
3492         for (i = 1; i < 16; i++) {
3493                 if (i < 8)
3494                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3495                                rdev->gart.table_addr >> 12);
3496                 else
3497                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3498                                rdev->gart.table_addr >> 12);
3499         }
3500
3501         /* enable context1-15 */
3502         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3503                (u32)(rdev->dummy_page.addr >> 12));
3504         WREG32(VM_CONTEXT1_CNTL2, 4);
3505         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3506                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3507                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3508                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3509                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3510                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3511                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3512                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3513                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3514                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3515                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3516                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3517                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3518
3519         si_pcie_gart_tlb_flush(rdev);
3520         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3521                  (unsigned)(rdev->mc.gtt_size >> 20),
3522                  (unsigned long long)rdev->gart.table_addr);
3523         rdev->gart.ready = true;
3524         return 0;
3525 }
3526
3527 static void si_pcie_gart_disable(struct radeon_device *rdev)
3528 {
3529         /* Disable all tables */
3530         WREG32(VM_CONTEXT0_CNTL, 0);
3531         WREG32(VM_CONTEXT1_CNTL, 0);
3532         /* Setup TLB control */
3533         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3534                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3535         /* Setup L2 cache */
3536         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3537                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3538                EFFECTIVE_L2_QUEUE_SIZE(7) |
3539                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3540         WREG32(VM_L2_CNTL2, 0);
3541         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3542                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3543         radeon_gart_table_vram_unpin(rdev);
3544 }
3545
3546 static void si_pcie_gart_fini(struct radeon_device *rdev)
3547 {
3548         si_pcie_gart_disable(rdev);
3549         radeon_gart_table_vram_free(rdev);
3550         radeon_gart_fini(rdev);
3551 }
3552
3553 /* vm parser */
3554 static bool si_vm_reg_valid(u32 reg)
3555 {
3556         /* context regs are fine */
3557         if (reg >= 0x28000)
3558                 return true;
3559
3560         /* check config regs */
3561         switch (reg) {
3562         case GRBM_GFX_INDEX:
3563         case CP_STRMOUT_CNTL:
3564         case VGT_VTX_VECT_EJECT_REG:
3565         case VGT_CACHE_INVALIDATION:
3566         case VGT_ESGS_RING_SIZE:
3567         case VGT_GSVS_RING_SIZE:
3568         case VGT_GS_VERTEX_REUSE:
3569         case VGT_PRIMITIVE_TYPE:
3570         case VGT_INDEX_TYPE:
3571         case VGT_NUM_INDICES:
3572         case VGT_NUM_INSTANCES:
3573         case VGT_TF_RING_SIZE:
3574         case VGT_HS_OFFCHIP_PARAM:
3575         case VGT_TF_MEMORY_BASE:
3576         case PA_CL_ENHANCE:
3577         case PA_SU_LINE_STIPPLE_VALUE:
3578         case PA_SC_LINE_STIPPLE_STATE:
3579         case PA_SC_ENHANCE:
3580         case SQC_CACHES:
3581         case SPI_STATIC_THREAD_MGMT_1:
3582         case SPI_STATIC_THREAD_MGMT_2:
3583         case SPI_STATIC_THREAD_MGMT_3:
3584         case SPI_PS_MAX_WAVE_ID:
3585         case SPI_CONFIG_CNTL:
3586         case SPI_CONFIG_CNTL_1:
3587         case TA_CNTL_AUX:
3588                 return true;
3589         default:
3590                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3591                 return false;
3592         }
3593 }
3594
3595 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3596                                   u32 *ib, struct radeon_cs_packet *pkt)
3597 {
3598         switch (pkt->opcode) {
3599         case PACKET3_NOP:
3600         case PACKET3_SET_BASE:
3601         case PACKET3_SET_CE_DE_COUNTERS:
3602         case PACKET3_LOAD_CONST_RAM:
3603         case PACKET3_WRITE_CONST_RAM:
3604         case PACKET3_WRITE_CONST_RAM_OFFSET:
3605         case PACKET3_DUMP_CONST_RAM:
3606         case PACKET3_INCREMENT_CE_COUNTER:
3607         case PACKET3_WAIT_ON_DE_COUNTER:
3608         case PACKET3_CE_WRITE:
3609                 break;
3610         default:
3611                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3612                 return -EINVAL;
3613         }
3614         return 0;
3615 }
3616
3617 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3618                                    u32 *ib, struct radeon_cs_packet *pkt)
3619 {
3620         u32 idx = pkt->idx + 1;
3621         u32 idx_value = ib[idx];
3622         u32 start_reg, end_reg, reg, i;
3623         u32 command, info;
3624
3625         switch (pkt->opcode) {
3626         case PACKET3_NOP:
3627         case PACKET3_SET_BASE:
3628         case PACKET3_CLEAR_STATE:
3629         case PACKET3_INDEX_BUFFER_SIZE:
3630         case PACKET3_DISPATCH_DIRECT:
3631         case PACKET3_DISPATCH_INDIRECT:
3632         case PACKET3_ALLOC_GDS:
3633         case PACKET3_WRITE_GDS_RAM:
3634         case PACKET3_ATOMIC_GDS:
3635         case PACKET3_ATOMIC:
3636         case PACKET3_OCCLUSION_QUERY:
3637         case PACKET3_SET_PREDICATION:
3638         case PACKET3_COND_EXEC:
3639         case PACKET3_PRED_EXEC:
3640         case PACKET3_DRAW_INDIRECT:
3641         case PACKET3_DRAW_INDEX_INDIRECT:
3642         case PACKET3_INDEX_BASE:
3643         case PACKET3_DRAW_INDEX_2:
3644         case PACKET3_CONTEXT_CONTROL:
3645         case PACKET3_INDEX_TYPE:
3646         case PACKET3_DRAW_INDIRECT_MULTI:
3647         case PACKET3_DRAW_INDEX_AUTO:
3648         case PACKET3_DRAW_INDEX_IMMD:
3649         case PACKET3_NUM_INSTANCES:
3650         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3651         case PACKET3_STRMOUT_BUFFER_UPDATE:
3652         case PACKET3_DRAW_INDEX_OFFSET_2:
3653         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3654         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3655         case PACKET3_MPEG_INDEX:
3656         case PACKET3_WAIT_REG_MEM:
3657         case PACKET3_MEM_WRITE:
3658         case PACKET3_PFP_SYNC_ME:
3659         case PACKET3_SURFACE_SYNC:
3660         case PACKET3_EVENT_WRITE:
3661         case PACKET3_EVENT_WRITE_EOP:
3662         case PACKET3_EVENT_WRITE_EOS:
3663         case PACKET3_SET_CONTEXT_REG:
3664         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3665         case PACKET3_SET_SH_REG:
3666         case PACKET3_SET_SH_REG_OFFSET:
3667         case PACKET3_INCREMENT_DE_COUNTER:
3668         case PACKET3_WAIT_ON_CE_COUNTER:
3669         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3670         case PACKET3_ME_WRITE:
3671                 break;
3672         case PACKET3_COPY_DATA:
3673                 if ((idx_value & 0xf00) == 0) {
3674                         reg = ib[idx + 3] * 4;
3675                         if (!si_vm_reg_valid(reg))
3676                                 return -EINVAL;
3677                 }
3678                 break;
3679         case PACKET3_WRITE_DATA:
3680                 if ((idx_value & 0xf00) == 0) {
3681                         start_reg = ib[idx + 1] * 4;
3682                         if (idx_value & 0x10000) {
3683                                 if (!si_vm_reg_valid(start_reg))
3684                                         return -EINVAL;
3685                         } else {
3686                                 for (i = 0; i < (pkt->count - 2); i++) {
3687                                         reg = start_reg + (4 * i);
3688                                         if (!si_vm_reg_valid(reg))
3689                                                 return -EINVAL;
3690                                 }
3691                         }
3692                 }
3693                 break;
3694         case PACKET3_COND_WRITE:
3695                 if (idx_value & 0x100) {
3696                         reg = ib[idx + 5] * 4;
3697                         if (!si_vm_reg_valid(reg))
3698                                 return -EINVAL;
3699                 }
3700                 break;
3701         case PACKET3_COPY_DW:
3702                 if (idx_value & 0x2) {
3703                         reg = ib[idx + 3] * 4;
3704                         if (!si_vm_reg_valid(reg))
3705                                 return -EINVAL;
3706                 }
3707                 break;
3708         case PACKET3_SET_CONFIG_REG:
3709                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3710                 end_reg = 4 * pkt->count + start_reg - 4;
3711                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3712                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3713                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3714                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3715                         return -EINVAL;
3716                 }
3717                 for (i = 0; i < pkt->count; i++) {
3718                         reg = start_reg + (4 * i);
3719                         if (!si_vm_reg_valid(reg))
3720                                 return -EINVAL;
3721                 }
3722                 break;
3723         case PACKET3_CP_DMA:
3724                 command = ib[idx + 4];
3725                 info = ib[idx + 1];
3726                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3727                         /* src address space is register */
3728                         if (((info & 0x60000000) >> 29) == 0) {
3729                                 start_reg = idx_value << 2;
3730                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3731                                         reg = start_reg;
3732                                         if (!si_vm_reg_valid(reg)) {
3733                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3734                                                 return -EINVAL;
3735                                         }
3736                                 } else {
3737                                         for (i = 0; i < (command & 0x1fffff); i++) {
3738                                                 reg = start_reg + (4 * i);
3739                                                 if (!si_vm_reg_valid(reg)) {
3740                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3741                                                         return -EINVAL;
3742                                                 }
3743                                         }
3744                                 }
3745                         }
3746                 }
3747                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3748                         /* dst address space is register */
3749                         if (((info & 0x00300000) >> 20) == 0) {
3750                                 start_reg = ib[idx + 2];
3751                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3752                                         reg = start_reg;
3753                                         if (!si_vm_reg_valid(reg)) {
3754                                                 DRM_ERROR("CP DMA Bad DST register\n");
3755                                                 return -EINVAL;
3756                                         }
3757                                 } else {
3758                                         for (i = 0; i < (command & 0x1fffff); i++) {
3759                                                 reg = start_reg + (4 * i);
3760                                                 if (!si_vm_reg_valid(reg)) {
3761                                                         DRM_ERROR("CP DMA Bad DST register\n");
3762                                                         return -EINVAL;
3763                                                 }
3764                                         }
3765                                 }
3766                         }
3767                 }
3768                 break;
3769         default:
3770                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3771                 return -EINVAL;
3772         }
3773         return 0;
3774 }
3775
3776 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3777                                        u32 *ib, struct radeon_cs_packet *pkt)
3778 {
3779         u32 idx = pkt->idx + 1;
3780         u32 idx_value = ib[idx];
3781         u32 start_reg, reg, i;
3782
3783         switch (pkt->opcode) {
3784         case PACKET3_NOP:
3785         case PACKET3_SET_BASE:
3786         case PACKET3_CLEAR_STATE:
3787         case PACKET3_DISPATCH_DIRECT:
3788         case PACKET3_DISPATCH_INDIRECT:
3789         case PACKET3_ALLOC_GDS:
3790         case PACKET3_WRITE_GDS_RAM:
3791         case PACKET3_ATOMIC_GDS:
3792         case PACKET3_ATOMIC:
3793         case PACKET3_OCCLUSION_QUERY:
3794         case PACKET3_SET_PREDICATION:
3795         case PACKET3_COND_EXEC:
3796         case PACKET3_PRED_EXEC:
3797         case PACKET3_CONTEXT_CONTROL:
3798         case PACKET3_STRMOUT_BUFFER_UPDATE:
3799         case PACKET3_WAIT_REG_MEM:
3800         case PACKET3_MEM_WRITE:
3801         case PACKET3_PFP_SYNC_ME:
3802         case PACKET3_SURFACE_SYNC:
3803         case PACKET3_EVENT_WRITE:
3804         case PACKET3_EVENT_WRITE_EOP:
3805         case PACKET3_EVENT_WRITE_EOS:
3806         case PACKET3_SET_CONTEXT_REG:
3807         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3808         case PACKET3_SET_SH_REG:
3809         case PACKET3_SET_SH_REG_OFFSET:
3810         case PACKET3_INCREMENT_DE_COUNTER:
3811         case PACKET3_WAIT_ON_CE_COUNTER:
3812         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3813         case PACKET3_ME_WRITE:
3814                 break;
3815         case PACKET3_COPY_DATA:
3816                 if ((idx_value & 0xf00) == 0) {
3817                         reg = ib[idx + 3] * 4;
3818                         if (!si_vm_reg_valid(reg))
3819                                 return -EINVAL;
3820                 }
3821                 break;
3822         case PACKET3_WRITE_DATA:
3823                 if ((idx_value & 0xf00) == 0) {
3824                         start_reg = ib[idx + 1] * 4;
3825                         if (idx_value & 0x10000) {
3826                                 if (!si_vm_reg_valid(start_reg))
3827                                         return -EINVAL;
3828                         } else {
3829                                 for (i = 0; i < (pkt->count - 2); i++) {
3830                                         reg = start_reg + (4 * i);
3831                                         if (!si_vm_reg_valid(reg))
3832                                                 return -EINVAL;
3833                                 }
3834                         }
3835                 }
3836                 break;
3837         case PACKET3_COND_WRITE:
3838                 if (idx_value & 0x100) {
3839                         reg = ib[idx + 5] * 4;
3840                         if (!si_vm_reg_valid(reg))
3841                                 return -EINVAL;
3842                 }
3843                 break;
3844         case PACKET3_COPY_DW:
3845                 if (idx_value & 0x2) {
3846                         reg = ib[idx + 3] * 4;
3847                         if (!si_vm_reg_valid(reg))
3848                                 return -EINVAL;
3849                 }
3850                 break;
3851         default:
3852                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3853                 return -EINVAL;
3854         }
3855         return 0;
3856 }
3857
3858 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3859 {
3860         int ret = 0;
3861         u32 idx = 0;
3862         struct radeon_cs_packet pkt;
3863
3864         do {
3865                 pkt.idx = idx;
3866                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3867                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3868                 pkt.one_reg_wr = 0;
3869                 switch (pkt.type) {
3870                 case RADEON_PACKET_TYPE0:
3871                         dev_err(rdev->dev, "Packet0 not allowed!\n");
3872                         ret = -EINVAL;
3873                         break;
3874                 case RADEON_PACKET_TYPE2:
3875                         idx += 1;
3876                         break;
3877                 case RADEON_PACKET_TYPE3:
3878                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3879                         if (ib->is_const_ib)
3880                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3881                         else {
3882                                 switch (ib->ring) {
3883                                 case RADEON_RING_TYPE_GFX_INDEX:
3884                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3885                                         break;
3886                                 case CAYMAN_RING_TYPE_CP1_INDEX:
3887                                 case CAYMAN_RING_TYPE_CP2_INDEX:
3888                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3889                                         break;
3890                                 default:
3891                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3892                                         ret = -EINVAL;
3893                                         break;
3894                                 }
3895                         }
3896                         idx += pkt.count + 2;
3897                         break;
3898                 default:
3899                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3900                         ret = -EINVAL;
3901                         break;
3902                 }
3903                 if (ret)
3904                         break;
3905         } while (idx < ib->length_dw);
3906
3907         return ret;
3908 }
3909
3910 /*
3911  * vm
3912  */
3913 int si_vm_init(struct radeon_device *rdev)
3914 {
3915         /* number of VMs */
3916         rdev->vm_manager.nvm = 16;
3917         /* base offset of vram pages */
3918         rdev->vm_manager.vram_base_offset = 0;
3919
3920         return 0;
3921 }
3922
3923 void si_vm_fini(struct radeon_device *rdev)
3924 {
3925 }
3926
3927 /**
3928  * si_vm_set_page - update the page tables using the CP
3929  *
3930  * @rdev: radeon_device pointer
3931  * @ib: indirect buffer to fill with commands
3932  * @pe: addr of the page entry
3933  * @addr: dst addr to write into pe
3934  * @count: number of page entries to update
3935  * @incr: increase next addr by incr bytes
3936  * @flags: access flags
3937  *
3938  * Update the page tables using the CP (SI).
3939  */
3940 void si_vm_set_page(struct radeon_device *rdev,
3941                     struct radeon_ib *ib,
3942                     uint64_t pe,
3943                     uint64_t addr, unsigned count,
3944                     uint32_t incr, uint32_t flags)
3945 {
3946         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3947         uint64_t value;
3948         unsigned ndw;
3949
3950         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3951                 while (count) {
3952                         ndw = 2 + count * 2;
3953                         if (ndw > 0x3FFE)
3954                                 ndw = 0x3FFE;
3955
3956                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3957                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3958                                         WRITE_DATA_DST_SEL(1));
3959                         ib->ptr[ib->length_dw++] = pe;
3960                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3961                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3962                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3963                                         value = radeon_vm_map_gart(rdev, addr);
3964                                         value &= 0xFFFFFFFFFFFFF000ULL;
3965                                 } else if (flags & RADEON_VM_PAGE_VALID) {
3966                                         value = addr;
3967                                 } else {
3968                                         value = 0;
3969                                 }
3970                                 addr += incr;
3971                                 value |= r600_flags;
3972                                 ib->ptr[ib->length_dw++] = value;
3973                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3974                         }
3975                 }
3976         } else {
3977                 /* DMA */
3978                 if (flags & RADEON_VM_PAGE_SYSTEM) {
3979                         while (count) {
3980                                 ndw = count * 2;
3981                                 if (ndw > 0xFFFFE)
3982                                         ndw = 0xFFFFE;
3983
3984                                 /* for non-physically contiguous pages (system) */
3985                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3986                                 ib->ptr[ib->length_dw++] = pe;
3987                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3988                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3989                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
3990                                                 value = radeon_vm_map_gart(rdev, addr);
3991                                                 value &= 0xFFFFFFFFFFFFF000ULL;
3992                                         } else if (flags & RADEON_VM_PAGE_VALID) {
3993                                                 value = addr;
3994                                         } else {
3995                                                 value = 0;
3996                                         }
3997                                         addr += incr;
3998                                         value |= r600_flags;
3999                                         ib->ptr[ib->length_dw++] = value;
4000                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4001                                 }
4002                         }
4003                 } else {
4004                         while (count) {
4005                                 ndw = count * 2;
4006                                 if (ndw > 0xFFFFE)
4007                                         ndw = 0xFFFFE;
4008
4009                                 if (flags & RADEON_VM_PAGE_VALID)
4010                                         value = addr;
4011                                 else
4012                                         value = 0;
4013                                 /* for physically contiguous pages (vram) */
4014                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4015                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4016                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4017                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4018                                 ib->ptr[ib->length_dw++] = 0;
4019                                 ib->ptr[ib->length_dw++] = value; /* value */
4020                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4021                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4022                                 ib->ptr[ib->length_dw++] = 0;
4023                                 pe += ndw * 4;
4024                                 addr += (ndw / 2) * incr;
4025                                 count -= ndw / 2;
4026                         }
4027                 }
4028                 while (ib->length_dw & 0x7)
4029                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4030         }
4031 }
4032
4033 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4034 {
4035         struct radeon_ring *ring = &rdev->ring[ridx];
4036
4037         if (vm == NULL)
4038                 return;
4039
4040         /* write new base address */
4041         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4042         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4043                                  WRITE_DATA_DST_SEL(0)));
4044
4045         if (vm->id < 8) {
4046                 radeon_ring_write(ring,
4047                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4048         } else {
4049                 radeon_ring_write(ring,
4050                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4051         }
4052         radeon_ring_write(ring, 0);
4053         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4054
4055         /* flush hdp cache */
4056         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4057         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4058                                  WRITE_DATA_DST_SEL(0)));
4059         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4060         radeon_ring_write(ring, 0);
4061         radeon_ring_write(ring, 0x1);
4062
4063         /* bits 0-15 are the VM contexts0-15 */
4064         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4065         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4066                                  WRITE_DATA_DST_SEL(0)));
4067         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4068         radeon_ring_write(ring, 0);
4069         radeon_ring_write(ring, 1 << vm->id);
4070
4071         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4072         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4073         radeon_ring_write(ring, 0x0);
4074 }
4075
4076 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4077 {
4078         struct radeon_ring *ring = &rdev->ring[ridx];
4079
4080         if (vm == NULL)
4081                 return;
4082
4083         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4084         if (vm->id < 8) {
4085                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4086         } else {
4087                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4088         }
4089         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4090
4091         /* flush hdp cache */
4092         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4093         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4094         radeon_ring_write(ring, 1);
4095
4096         /* bits 0-7 are the VM contexts0-7 */
4097         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4098         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4099         radeon_ring_write(ring, 1 << vm->id);
4100 }
4101
4102 /*
4103  * RLC
4104  */
4105 void si_rlc_fini(struct radeon_device *rdev)
4106 {
4107         int r;
4108
4109         /* save restore block */
4110         if (rdev->rlc.save_restore_obj) {
4111                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4112                 if (unlikely(r != 0))
4113                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4114                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4115                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4116
4117                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4118                 rdev->rlc.save_restore_obj = NULL;
4119         }
4120
4121         /* clear state block */
4122         if (rdev->rlc.clear_state_obj) {
4123                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4124                 if (unlikely(r != 0))
4125                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4126                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4127                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4128
4129                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4130                 rdev->rlc.clear_state_obj = NULL;
4131         }
4132 }
4133
4134 int si_rlc_init(struct radeon_device *rdev)
4135 {
4136         int r;
4137
4138         /* save restore block */
4139         if (rdev->rlc.save_restore_obj == NULL) {
4140                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4141                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4142                                      &rdev->rlc.save_restore_obj);
4143                 if (r) {
4144                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4145                         return r;
4146                 }
4147         }
4148
4149         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4150         if (unlikely(r != 0)) {
4151                 si_rlc_fini(rdev);
4152                 return r;
4153         }
4154         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4155                           &rdev->rlc.save_restore_gpu_addr);
4156         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4157         if (r) {
4158                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4159                 si_rlc_fini(rdev);
4160                 return r;
4161         }
4162
4163         /* clear state block */
4164         if (rdev->rlc.clear_state_obj == NULL) {
4165                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4166                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4167                                      &rdev->rlc.clear_state_obj);
4168                 if (r) {
4169                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4170                         si_rlc_fini(rdev);
4171                         return r;
4172                 }
4173         }
4174         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4175         if (unlikely(r != 0)) {
4176                 si_rlc_fini(rdev);
4177                 return r;
4178         }
4179         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4180                           &rdev->rlc.clear_state_gpu_addr);
4181         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4182         if (r) {
4183                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4184                 si_rlc_fini(rdev);
4185                 return r;
4186         }
4187
4188         return 0;
4189 }
4190
4191 static void si_rlc_stop(struct radeon_device *rdev)
4192 {
4193         WREG32(RLC_CNTL, 0);
4194 }
4195
4196 static void si_rlc_start(struct radeon_device *rdev)
4197 {
4198         WREG32(RLC_CNTL, RLC_ENABLE);
4199 }
4200
4201 static int si_rlc_resume(struct radeon_device *rdev)
4202 {
4203         u32 i;
4204         const __be32 *fw_data;
4205
4206         if (!rdev->rlc_fw)
4207                 return -EINVAL;
4208
4209         si_rlc_stop(rdev);
4210
4211         WREG32(RLC_RL_BASE, 0);
4212         WREG32(RLC_RL_SIZE, 0);
4213         WREG32(RLC_LB_CNTL, 0);
4214         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4215         WREG32(RLC_LB_CNTR_INIT, 0);
4216
4217         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4218         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4219
4220         WREG32(RLC_MC_CNTL, 0);
4221         WREG32(RLC_UCODE_CNTL, 0);
4222
4223         fw_data = (const __be32 *)rdev->rlc_fw->data;
4224         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4225                 WREG32(RLC_UCODE_ADDR, i);
4226                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4227         }
4228         WREG32(RLC_UCODE_ADDR, 0);
4229
4230         si_rlc_start(rdev);
4231
4232         return 0;
4233 }
4234
4235 static void si_enable_interrupts(struct radeon_device *rdev)
4236 {
4237         u32 ih_cntl = RREG32(IH_CNTL);
4238         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4239
4240         ih_cntl |= ENABLE_INTR;
4241         ih_rb_cntl |= IH_RB_ENABLE;
4242         WREG32(IH_CNTL, ih_cntl);
4243         WREG32(IH_RB_CNTL, ih_rb_cntl);
4244         rdev->ih.enabled = true;
4245 }
4246
4247 static void si_disable_interrupts(struct radeon_device *rdev)
4248 {
4249         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4250         u32 ih_cntl = RREG32(IH_CNTL);
4251
4252         ih_rb_cntl &= ~IH_RB_ENABLE;
4253         ih_cntl &= ~ENABLE_INTR;
4254         WREG32(IH_RB_CNTL, ih_rb_cntl);
4255         WREG32(IH_CNTL, ih_cntl);
4256         /* set rptr, wptr to 0 */
4257         WREG32(IH_RB_RPTR, 0);
4258         WREG32(IH_RB_WPTR, 0);
4259         rdev->ih.enabled = false;
4260         rdev->ih.rptr = 0;
4261 }
4262
4263 static void si_disable_interrupt_state(struct radeon_device *rdev)
4264 {
4265         u32 tmp;
4266
4267         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4268         WREG32(CP_INT_CNTL_RING1, 0);
4269         WREG32(CP_INT_CNTL_RING2, 0);
4270         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4271         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4272         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4273         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4274         WREG32(GRBM_INT_CNTL, 0);
4275         if (rdev->num_crtc >= 2) {
4276                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4277                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4278         }
4279         if (rdev->num_crtc >= 4) {
4280                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4281                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4282         }
4283         if (rdev->num_crtc >= 6) {
4284                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4285                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4286         }
4287
4288         if (rdev->num_crtc >= 2) {
4289                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4290                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4291         }
4292         if (rdev->num_crtc >= 4) {
4293                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4294                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4295         }
4296         if (rdev->num_crtc >= 6) {
4297                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4298                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4299         }
4300
4301         if (!ASIC_IS_NODCE(rdev)) {
4302                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4303
4304                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4305                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4306                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4307                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4308                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4309                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4310                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4311                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4312                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4313                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4314                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4315                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4316         }
4317 }
4318
4319 static int si_irq_init(struct radeon_device *rdev)
4320 {
4321         int ret = 0;
4322         int rb_bufsz;
4323         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4324
4325         /* allocate ring */
4326         ret = r600_ih_ring_alloc(rdev);
4327         if (ret)
4328                 return ret;
4329
4330         /* disable irqs */
4331         si_disable_interrupts(rdev);
4332
4333         /* init rlc */
4334         ret = si_rlc_resume(rdev);
4335         if (ret) {
4336                 r600_ih_ring_fini(rdev);
4337                 return ret;
4338         }
4339
4340         /* setup interrupt control */
4341         /* set dummy read address to ring address */
4342         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4343         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4344         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4345          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4346          */
4347         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4348         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4349         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4350         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4351
4352         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4353         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4354
4355         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4356                       IH_WPTR_OVERFLOW_CLEAR |
4357                       (rb_bufsz << 1));
4358
4359         if (rdev->wb.enabled)
4360                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4361
4362         /* set the writeback address whether it's enabled or not */
4363         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4364         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4365
4366         WREG32(IH_RB_CNTL, ih_rb_cntl);
4367
4368         /* set rptr, wptr to 0 */
4369         WREG32(IH_RB_RPTR, 0);
4370         WREG32(IH_RB_WPTR, 0);
4371
4372         /* Default settings for IH_CNTL (disabled at first) */
4373         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4374         /* RPTR_REARM only works if msi's are enabled */
4375         if (rdev->msi_enabled)
4376                 ih_cntl |= RPTR_REARM;
4377         WREG32(IH_CNTL, ih_cntl);
4378
4379         /* force the active interrupt state to all disabled */
4380         si_disable_interrupt_state(rdev);
4381
4382         pci_set_master(rdev->pdev);
4383
4384         /* enable irqs */
4385         si_enable_interrupts(rdev);
4386
4387         return ret;
4388 }
4389
4390 int si_irq_set(struct radeon_device *rdev)
4391 {
4392         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4393         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4394         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4395         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4396         u32 grbm_int_cntl = 0;
4397         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4398         u32 dma_cntl, dma_cntl1;
4399
4400         if (!rdev->irq.installed) {
4401                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4402                 return -EINVAL;
4403         }
4404         /* don't enable anything if the ih is disabled */
4405         if (!rdev->ih.enabled) {
4406                 si_disable_interrupts(rdev);
4407                 /* force the active interrupt state to all disabled */
4408                 si_disable_interrupt_state(rdev);
4409                 return 0;
4410         }
4411
4412         if (!ASIC_IS_NODCE(rdev)) {
4413                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4414                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4415                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4416                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4417                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4418                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4419         }
4420
4421         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4422         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4423
4424         /* enable CP interrupts on all rings */
4425         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4426                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4427                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4428         }
4429         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4430                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4431                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4432         }
4433         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4434                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4435                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4436         }
4437         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4438                 DRM_DEBUG("si_irq_set: sw int dma\n");
4439                 dma_cntl |= TRAP_ENABLE;
4440         }
4441
4442         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4443                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4444                 dma_cntl1 |= TRAP_ENABLE;
4445         }
4446         if (rdev->irq.crtc_vblank_int[0] ||
4447             atomic_read(&rdev->irq.pflip[0])) {
4448                 DRM_DEBUG("si_irq_set: vblank 0\n");
4449                 crtc1 |= VBLANK_INT_MASK;
4450         }
4451         if (rdev->irq.crtc_vblank_int[1] ||
4452             atomic_read(&rdev->irq.pflip[1])) {
4453                 DRM_DEBUG("si_irq_set: vblank 1\n");
4454                 crtc2 |= VBLANK_INT_MASK;
4455         }
4456         if (rdev->irq.crtc_vblank_int[2] ||
4457             atomic_read(&rdev->irq.pflip[2])) {
4458                 DRM_DEBUG("si_irq_set: vblank 2\n");
4459                 crtc3 |= VBLANK_INT_MASK;
4460         }
4461         if (rdev->irq.crtc_vblank_int[3] ||
4462             atomic_read(&rdev->irq.pflip[3])) {
4463                 DRM_DEBUG("si_irq_set: vblank 3\n");
4464                 crtc4 |= VBLANK_INT_MASK;
4465         }
4466         if (rdev->irq.crtc_vblank_int[4] ||
4467             atomic_read(&rdev->irq.pflip[4])) {
4468                 DRM_DEBUG("si_irq_set: vblank 4\n");
4469                 crtc5 |= VBLANK_INT_MASK;
4470         }
4471         if (rdev->irq.crtc_vblank_int[5] ||
4472             atomic_read(&rdev->irq.pflip[5])) {
4473                 DRM_DEBUG("si_irq_set: vblank 5\n");
4474                 crtc6 |= VBLANK_INT_MASK;
4475         }
4476         if (rdev->irq.hpd[0]) {
4477                 DRM_DEBUG("si_irq_set: hpd 1\n");
4478                 hpd1 |= DC_HPDx_INT_EN;
4479         }
4480         if (rdev->irq.hpd[1]) {
4481                 DRM_DEBUG("si_irq_set: hpd 2\n");
4482                 hpd2 |= DC_HPDx_INT_EN;
4483         }
4484         if (rdev->irq.hpd[2]) {
4485                 DRM_DEBUG("si_irq_set: hpd 3\n");
4486                 hpd3 |= DC_HPDx_INT_EN;
4487         }
4488         if (rdev->irq.hpd[3]) {
4489                 DRM_DEBUG("si_irq_set: hpd 4\n");
4490                 hpd4 |= DC_HPDx_INT_EN;
4491         }
4492         if (rdev->irq.hpd[4]) {
4493                 DRM_DEBUG("si_irq_set: hpd 5\n");
4494                 hpd5 |= DC_HPDx_INT_EN;
4495         }
4496         if (rdev->irq.hpd[5]) {
4497                 DRM_DEBUG("si_irq_set: hpd 6\n");
4498                 hpd6 |= DC_HPDx_INT_EN;
4499         }
4500
4501         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4502         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4503         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4504
4505         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4506         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4507
4508         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4509
4510         if (rdev->num_crtc >= 2) {
4511                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4512                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4513         }
4514         if (rdev->num_crtc >= 4) {
4515                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4516                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4517         }
4518         if (rdev->num_crtc >= 6) {
4519                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4520                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4521         }
4522
4523         if (rdev->num_crtc >= 2) {
4524                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4525                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4526         }
4527         if (rdev->num_crtc >= 4) {
4528                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4529                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4530         }
4531         if (rdev->num_crtc >= 6) {
4532                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4533                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4534         }
4535
4536         if (!ASIC_IS_NODCE(rdev)) {
4537                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4538                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4539                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4540                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4541                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4542                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4543         }
4544
4545         return 0;
4546 }
4547
4548 static inline void si_irq_ack(struct radeon_device *rdev)
4549 {
4550         u32 tmp;
4551
4552         if (ASIC_IS_NODCE(rdev))
4553                 return;
4554
4555         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4556         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4557         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4558         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4559         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4560         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4561         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4562         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4563         if (rdev->num_crtc >= 4) {
4564                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4565                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4566         }
4567         if (rdev->num_crtc >= 6) {
4568                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4569                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4570         }
4571
4572         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4573                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4574         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4575                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4576         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4577                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4578         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4579                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4580         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4581                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4582         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4583                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4584
4585         if (rdev->num_crtc >= 4) {
4586                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4587                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4588                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4589                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4590                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4591                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4592                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4593                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4594                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4595                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4596                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4597                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4598         }
4599
4600         if (rdev->num_crtc >= 6) {
4601                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4602                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4603                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4604                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4605                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4606                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4607                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4608                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4609                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4610                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4611                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4612                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4613         }
4614
4615         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4616                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4617                 tmp |= DC_HPDx_INT_ACK;
4618                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4619         }
4620         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4621                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4622                 tmp |= DC_HPDx_INT_ACK;
4623                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4624         }
4625         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4626                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4627                 tmp |= DC_HPDx_INT_ACK;
4628                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4629         }
4630         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4631                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4632                 tmp |= DC_HPDx_INT_ACK;
4633                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4634         }
4635         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4636                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4637                 tmp |= DC_HPDx_INT_ACK;
4638                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4639         }
4640         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4641                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4642                 tmp |= DC_HPDx_INT_ACK;
4643                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4644         }
4645 }
4646
4647 static void si_irq_disable(struct radeon_device *rdev)
4648 {
4649         si_disable_interrupts(rdev);
4650         /* Wait and acknowledge irq */
4651         mdelay(1);
4652         si_irq_ack(rdev);
4653         si_disable_interrupt_state(rdev);
4654 }
4655
4656 static void si_irq_suspend(struct radeon_device *rdev)
4657 {
4658         si_irq_disable(rdev);
4659         si_rlc_stop(rdev);
4660 }
4661
4662 static void si_irq_fini(struct radeon_device *rdev)
4663 {
4664         si_irq_suspend(rdev);
4665         r600_ih_ring_fini(rdev);
4666 }
4667
4668 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4669 {
4670         u32 wptr, tmp;
4671
4672         if (rdev->wb.enabled)
4673                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4674         else
4675                 wptr = RREG32(IH_RB_WPTR);
4676
4677         if (wptr & RB_OVERFLOW) {
4678                 /* When a ring buffer overflow happen start parsing interrupt
4679                  * from the last not overwritten vector (wptr + 16). Hopefully
4680                  * this should allow us to catchup.
4681                  */
4682                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4683                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4684                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4685                 tmp = RREG32(IH_RB_CNTL);
4686                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4687                 WREG32(IH_RB_CNTL, tmp);
4688         }
4689         return (wptr & rdev->ih.ptr_mask);
4690 }
4691
4692 /*        SI IV Ring
4693  * Each IV ring entry is 128 bits:
4694  * [7:0]    - interrupt source id
4695  * [31:8]   - reserved
4696  * [59:32]  - interrupt source data
4697  * [63:60]  - reserved
4698  * [71:64]  - RINGID
4699  * [79:72]  - VMID
4700  * [127:80] - reserved
4701  */
4702 int si_irq_process(struct radeon_device *rdev)
4703 {
4704         u32 wptr;
4705         u32 rptr;
4706         u32 src_id, src_data, ring_id;
4707         u32 ring_index;
4708         bool queue_hotplug = false;
4709
4710         if (!rdev->ih.enabled || rdev->shutdown)
4711                 return IRQ_NONE;
4712
4713         wptr = si_get_ih_wptr(rdev);
4714
4715 restart_ih:
4716         /* is somebody else already processing irqs? */
4717         if (atomic_xchg(&rdev->ih.lock, 1))
4718                 return IRQ_NONE;
4719
4720         rptr = rdev->ih.rptr;
4721         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4722
4723         /* Order reading of wptr vs. reading of IH ring data */
4724         rmb();
4725
4726         /* display interrupts */
4727         si_irq_ack(rdev);
4728
4729         while (rptr != wptr) {
4730                 /* wptr/rptr are in bytes! */
4731                 ring_index = rptr / 4;
4732                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4733                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4734                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4735
4736                 switch (src_id) {
4737                 case 1: /* D1 vblank/vline */
4738                         switch (src_data) {
4739                         case 0: /* D1 vblank */
4740                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4741                                         if (rdev->irq.crtc_vblank_int[0]) {
4742                                                 drm_handle_vblank(rdev->ddev, 0);
4743                                                 rdev->pm.vblank_sync = true;
4744                                                 wake_up(&rdev->irq.vblank_queue);
4745                                         }
4746                                         if (atomic_read(&rdev->irq.pflip[0]))
4747                                                 radeon_crtc_handle_flip(rdev, 0);
4748                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4749                                         DRM_DEBUG("IH: D1 vblank\n");
4750                                 }
4751                                 break;
4752                         case 1: /* D1 vline */
4753                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4754                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4755                                         DRM_DEBUG("IH: D1 vline\n");
4756                                 }
4757                                 break;
4758                         default:
4759                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4760                                 break;
4761                         }
4762                         break;
4763                 case 2: /* D2 vblank/vline */
4764                         switch (src_data) {
4765                         case 0: /* D2 vblank */
4766                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4767                                         if (rdev->irq.crtc_vblank_int[1]) {
4768                                                 drm_handle_vblank(rdev->ddev, 1);
4769                                                 rdev->pm.vblank_sync = true;
4770                                                 wake_up(&rdev->irq.vblank_queue);
4771                                         }
4772                                         if (atomic_read(&rdev->irq.pflip[1]))
4773                                                 radeon_crtc_handle_flip(rdev, 1);
4774                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4775                                         DRM_DEBUG("IH: D2 vblank\n");
4776                                 }
4777                                 break;
4778                         case 1: /* D2 vline */
4779                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4780                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4781                                         DRM_DEBUG("IH: D2 vline\n");
4782                                 }
4783                                 break;
4784                         default:
4785                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4786                                 break;
4787                         }
4788                         break;
4789                 case 3: /* D3 vblank/vline */
4790                         switch (src_data) {
4791                         case 0: /* D3 vblank */
4792                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4793                                         if (rdev->irq.crtc_vblank_int[2]) {
4794                                                 drm_handle_vblank(rdev->ddev, 2);
4795                                                 rdev->pm.vblank_sync = true;
4796                                                 wake_up(&rdev->irq.vblank_queue);
4797                                         }
4798                                         if (atomic_read(&rdev->irq.pflip[2]))
4799                                                 radeon_crtc_handle_flip(rdev, 2);
4800                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4801                                         DRM_DEBUG("IH: D3 vblank\n");
4802                                 }
4803                                 break;
4804                         case 1: /* D3 vline */
4805                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4806                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4807                                         DRM_DEBUG("IH: D3 vline\n");
4808                                 }
4809                                 break;
4810                         default:
4811                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4812                                 break;
4813                         }
4814                         break;
4815                 case 4: /* D4 vblank/vline */
4816                         switch (src_data) {
4817                         case 0: /* D4 vblank */
4818                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4819                                         if (rdev->irq.crtc_vblank_int[3]) {
4820                                                 drm_handle_vblank(rdev->ddev, 3);
4821                                                 rdev->pm.vblank_sync = true;
4822                                                 wake_up(&rdev->irq.vblank_queue);
4823                                         }
4824                                         if (atomic_read(&rdev->irq.pflip[3]))
4825                                                 radeon_crtc_handle_flip(rdev, 3);
4826                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4827                                         DRM_DEBUG("IH: D4 vblank\n");
4828                                 }
4829                                 break;
4830                         case 1: /* D4 vline */
4831                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4832                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4833                                         DRM_DEBUG("IH: D4 vline\n");
4834                                 }
4835                                 break;
4836                         default:
4837                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4838                                 break;
4839                         }
4840                         break;
4841                 case 5: /* D5 vblank/vline */
4842                         switch (src_data) {
4843                         case 0: /* D5 vblank */
4844                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4845                                         if (rdev->irq.crtc_vblank_int[4]) {
4846                                                 drm_handle_vblank(rdev->ddev, 4);
4847                                                 rdev->pm.vblank_sync = true;
4848                                                 wake_up(&rdev->irq.vblank_queue);
4849                                         }
4850                                         if (atomic_read(&rdev->irq.pflip[4]))
4851                                                 radeon_crtc_handle_flip(rdev, 4);
4852                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4853                                         DRM_DEBUG("IH: D5 vblank\n");
4854                                 }
4855                                 break;
4856                         case 1: /* D5 vline */
4857                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4858                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4859                                         DRM_DEBUG("IH: D5 vline\n");
4860                                 }
4861                                 break;
4862                         default:
4863                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4864                                 break;
4865                         }
4866                         break;
4867                 case 6: /* D6 vblank/vline */
4868                         switch (src_data) {
4869                         case 0: /* D6 vblank */
4870                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4871                                         if (rdev->irq.crtc_vblank_int[5]) {
4872                                                 drm_handle_vblank(rdev->ddev, 5);
4873                                                 rdev->pm.vblank_sync = true;
4874                                                 wake_up(&rdev->irq.vblank_queue);
4875                                         }
4876                                         if (atomic_read(&rdev->irq.pflip[5]))
4877                                                 radeon_crtc_handle_flip(rdev, 5);
4878                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4879                                         DRM_DEBUG("IH: D6 vblank\n");
4880                                 }
4881                                 break;
4882                         case 1: /* D6 vline */
4883                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4884                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4885                                         DRM_DEBUG("IH: D6 vline\n");
4886                                 }
4887                                 break;
4888                         default:
4889                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4890                                 break;
4891                         }
4892                         break;
4893                 case 42: /* HPD hotplug */
4894                         switch (src_data) {
4895                         case 0:
4896                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4897                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4898                                         queue_hotplug = true;
4899                                         DRM_DEBUG("IH: HPD1\n");
4900                                 }
4901                                 break;
4902                         case 1:
4903                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4904                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4905                                         queue_hotplug = true;
4906                                         DRM_DEBUG("IH: HPD2\n");
4907                                 }
4908                                 break;
4909                         case 2:
4910                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4911                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4912                                         queue_hotplug = true;
4913                                         DRM_DEBUG("IH: HPD3\n");
4914                                 }
4915                                 break;
4916                         case 3:
4917                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4918                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4919                                         queue_hotplug = true;
4920                                         DRM_DEBUG("IH: HPD4\n");
4921                                 }
4922                                 break;
4923                         case 4:
4924                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4925                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4926                                         queue_hotplug = true;
4927                                         DRM_DEBUG("IH: HPD5\n");
4928                                 }
4929                                 break;
4930                         case 5:
4931                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4932                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4933                                         queue_hotplug = true;
4934                                         DRM_DEBUG("IH: HPD6\n");
4935                                 }
4936                                 break;
4937                         default:
4938                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4939                                 break;
4940                         }
4941                         break;
4942                 case 146:
4943                 case 147:
4944                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4945                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4946                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4947                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4948                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4949                         /* reset addr and status */
4950                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4951                         break;
4952                 case 176: /* RINGID0 CP_INT */
4953                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4954                         break;
4955                 case 177: /* RINGID1 CP_INT */
4956                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4957                         break;
4958                 case 178: /* RINGID2 CP_INT */
4959                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4960                         break;
4961                 case 181: /* CP EOP event */
4962                         DRM_DEBUG("IH: CP EOP\n");
4963                         switch (ring_id) {
4964                         case 0:
4965                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4966                                 break;
4967                         case 1:
4968                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4969                                 break;
4970                         case 2:
4971                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4972                                 break;
4973                         }
4974                         break;
4975                 case 224: /* DMA trap event */
4976                         DRM_DEBUG("IH: DMA trap\n");
4977                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4978                         break;
4979                 case 233: /* GUI IDLE */
4980                         DRM_DEBUG("IH: GUI idle\n");
4981                         break;
4982                 case 244: /* DMA trap event */
4983                         DRM_DEBUG("IH: DMA1 trap\n");
4984                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4985                         break;
4986                 default:
4987                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4988                         break;
4989                 }
4990
4991                 /* wptr/rptr are in bytes! */
4992                 rptr += 16;
4993                 rptr &= rdev->ih.ptr_mask;
4994         }
4995         if (queue_hotplug)
4996                 schedule_work(&rdev->hotplug_work);
4997         rdev->ih.rptr = rptr;
4998         WREG32(IH_RB_RPTR, rdev->ih.rptr);
4999         atomic_set(&rdev->ih.lock, 0);
5000
5001         /* make sure wptr hasn't changed while processing */
5002         wptr = si_get_ih_wptr(rdev);
5003         if (wptr != rptr)
5004                 goto restart_ih;
5005
5006         return IRQ_HANDLED;
5007 }
5008
5009 /**
5010  * si_copy_dma - copy pages using the DMA engine
5011  *
5012  * @rdev: radeon_device pointer
5013  * @src_offset: src GPU address
5014  * @dst_offset: dst GPU address
5015  * @num_gpu_pages: number of GPU pages to xfer
5016  * @fence: radeon fence object
5017  *
5018  * Copy GPU paging using the DMA engine (SI).
5019  * Used by the radeon ttm implementation to move pages if
5020  * registered as the asic copy callback.
5021  */
5022 int si_copy_dma(struct radeon_device *rdev,
5023                 uint64_t src_offset, uint64_t dst_offset,
5024                 unsigned num_gpu_pages,
5025                 struct radeon_fence **fence)
5026 {
5027         struct radeon_semaphore *sem = NULL;
5028         int ring_index = rdev->asic->copy.dma_ring_index;
5029         struct radeon_ring *ring = &rdev->ring[ring_index];
5030         u32 size_in_bytes, cur_size_in_bytes;
5031         int i, num_loops;
5032         int r = 0;
5033
5034         r = radeon_semaphore_create(rdev, &sem);
5035         if (r) {
5036                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5037                 return r;
5038         }
5039
5040         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5041         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5042         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5043         if (r) {
5044                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5045                 radeon_semaphore_free(rdev, &sem, NULL);
5046                 return r;
5047         }
5048
5049         if (radeon_fence_need_sync(*fence, ring->idx)) {
5050                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5051                                             ring->idx);
5052                 radeon_fence_note_sync(*fence, ring->idx);
5053         } else {
5054                 radeon_semaphore_free(rdev, &sem, NULL);
5055         }
5056
5057         for (i = 0; i < num_loops; i++) {
5058                 cur_size_in_bytes = size_in_bytes;
5059                 if (cur_size_in_bytes > 0xFFFFF)
5060                         cur_size_in_bytes = 0xFFFFF;
5061                 size_in_bytes -= cur_size_in_bytes;
5062                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5063                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5064                 radeon_ring_write(ring, src_offset & 0xffffffff);
5065                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5066                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5067                 src_offset += cur_size_in_bytes;
5068                 dst_offset += cur_size_in_bytes;
5069         }
5070
5071         r = radeon_fence_emit(rdev, fence, ring->idx);
5072         if (r) {
5073                 radeon_ring_unlock_undo(rdev, ring);
5074                 return r;
5075         }
5076
5077         radeon_ring_unlock_commit(rdev, ring);
5078         radeon_semaphore_free(rdev, &sem, *fence);
5079
5080         return r;
5081 }
5082
5083 /*
5084  * startup/shutdown callbacks
5085  */
5086 static int si_startup(struct radeon_device *rdev)
5087 {
5088         struct radeon_ring *ring;
5089         int r;
5090
5091         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5092             !rdev->rlc_fw || !rdev->mc_fw) {
5093                 r = si_init_microcode(rdev);
5094                 if (r) {
5095                         DRM_ERROR("Failed to load firmware!\n");
5096                         return r;
5097                 }
5098         }
5099
5100         r = si_mc_load_microcode(rdev);
5101         if (r) {
5102                 DRM_ERROR("Failed to load MC firmware!\n");
5103                 return r;
5104         }
5105
5106         r = r600_vram_scratch_init(rdev);
5107         if (r)
5108                 return r;
5109
5110         si_mc_program(rdev);
5111         r = si_pcie_gart_enable(rdev);
5112         if (r)
5113                 return r;
5114         si_gpu_init(rdev);
5115
5116         /* allocate rlc buffers */
5117         r = si_rlc_init(rdev);
5118         if (r) {
5119                 DRM_ERROR("Failed to init rlc BOs!\n");
5120                 return r;
5121         }
5122
5123         /* allocate wb buffer */
5124         r = radeon_wb_init(rdev);
5125         if (r)
5126                 return r;
5127
5128         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5129         if (r) {
5130                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5131                 return r;
5132         }
5133
5134         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5135         if (r) {
5136                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5137                 return r;
5138         }
5139
5140         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5141         if (r) {
5142                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5143                 return r;
5144         }
5145
5146         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5147         if (r) {
5148                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5149                 return r;
5150         }
5151
5152         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5153         if (r) {
5154                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5155                 return r;
5156         }
5157
5158         r = rv770_uvd_resume(rdev);
5159         if (!r) {
5160                 r = radeon_fence_driver_start_ring(rdev,
5161                                                    R600_RING_TYPE_UVD_INDEX);
5162                 if (r)
5163                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5164         }
5165         if (r)
5166                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5167
5168         /* Enable IRQ */
5169         r = si_irq_init(rdev);
5170         if (r) {
5171                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5172                 radeon_irq_kms_fini(rdev);
5173                 return r;
5174         }
5175         si_irq_set(rdev);
5176
5177         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5178         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5179                              CP_RB0_RPTR, CP_RB0_WPTR,
5180                              0, 0xfffff, RADEON_CP_PACKET2);
5181         if (r)
5182                 return r;
5183
5184         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5185         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5186                              CP_RB1_RPTR, CP_RB1_WPTR,
5187                              0, 0xfffff, RADEON_CP_PACKET2);
5188         if (r)
5189                 return r;
5190
5191         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5192         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5193                              CP_RB2_RPTR, CP_RB2_WPTR,
5194                              0, 0xfffff, RADEON_CP_PACKET2);
5195         if (r)
5196                 return r;
5197
5198         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5199         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5200                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5201                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5202                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5203         if (r)
5204                 return r;
5205
5206         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5207         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5208                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5209                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5210                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5211         if (r)
5212                 return r;
5213
5214         r = si_cp_load_microcode(rdev);
5215         if (r)
5216                 return r;
5217         r = si_cp_resume(rdev);
5218         if (r)
5219                 return r;
5220
5221         r = cayman_dma_resume(rdev);
5222         if (r)
5223                 return r;
5224
5225         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5226         if (ring->ring_size) {
5227                 r = radeon_ring_init(rdev, ring, ring->ring_size,
5228                                      R600_WB_UVD_RPTR_OFFSET,
5229                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5230                                      0, 0xfffff, RADEON_CP_PACKET2);
5231                 if (!r)
5232                         r = r600_uvd_init(rdev);
5233                 if (r)
5234                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5235         }
5236
5237         r = radeon_ib_pool_init(rdev);
5238         if (r) {
5239                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5240                 return r;
5241         }
5242
5243         r = radeon_vm_manager_init(rdev);
5244         if (r) {
5245                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5246                 return r;
5247         }
5248
5249         return 0;
5250 }
5251
5252 int si_resume(struct radeon_device *rdev)
5253 {
5254         int r;
5255
5256         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5257          * posting will perform necessary task to bring back GPU into good
5258          * shape.
5259          */
5260         /* post card */
5261         atom_asic_init(rdev->mode_info.atom_context);
5262
5263         /* init golden registers */
5264         si_init_golden_registers(rdev);
5265
5266         rdev->accel_working = true;
5267         r = si_startup(rdev);
5268         if (r) {
5269                 DRM_ERROR("si startup failed on resume\n");
5270                 rdev->accel_working = false;
5271                 return r;
5272         }
5273
5274         return r;
5275
5276 }
5277
5278 int si_suspend(struct radeon_device *rdev)
5279 {
5280         radeon_vm_manager_fini(rdev);
5281         si_cp_enable(rdev, false);
5282         cayman_dma_stop(rdev);
5283         r600_uvd_rbc_stop(rdev);
5284         radeon_uvd_suspend(rdev);
5285         si_irq_suspend(rdev);
5286         radeon_wb_disable(rdev);
5287         si_pcie_gart_disable(rdev);
5288         return 0;
5289 }
5290
5291 /* Plan is to move initialization in that function and use
5292  * helper function so that radeon_device_init pretty much
5293  * do nothing more than calling asic specific function. This
5294  * should also allow to remove a bunch of callback function
5295  * like vram_info.
5296  */
5297 int si_init(struct radeon_device *rdev)
5298 {
5299         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5300         int r;
5301
5302         /* Read BIOS */
5303         if (!radeon_get_bios(rdev)) {
5304                 if (ASIC_IS_AVIVO(rdev))
5305                         return -EINVAL;
5306         }
5307         /* Must be an ATOMBIOS */
5308         if (!rdev->is_atom_bios) {
5309                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5310                 return -EINVAL;
5311         }
5312         r = radeon_atombios_init(rdev);
5313         if (r)
5314                 return r;
5315
5316         /* Post card if necessary */
5317         if (!radeon_card_posted(rdev)) {
5318                 if (!rdev->bios) {
5319                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5320                         return -EINVAL;
5321                 }
5322                 DRM_INFO("GPU not posted. posting now...\n");
5323                 atom_asic_init(rdev->mode_info.atom_context);
5324         }
5325         /* init golden registers */
5326         si_init_golden_registers(rdev);
5327         /* Initialize scratch registers */
5328         si_scratch_init(rdev);
5329         /* Initialize surface registers */
5330         radeon_surface_init(rdev);
5331         /* Initialize clocks */
5332         radeon_get_clock_info(rdev->ddev);
5333
5334         /* Fence driver */
5335         r = radeon_fence_driver_init(rdev);
5336         if (r)
5337                 return r;
5338
5339         /* initialize memory controller */
5340         r = si_mc_init(rdev);
5341         if (r)
5342                 return r;
5343         /* Memory manager */
5344         r = radeon_bo_init(rdev);
5345         if (r)
5346                 return r;
5347
5348         r = radeon_irq_kms_init(rdev);
5349         if (r)
5350                 return r;
5351
5352         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5353         ring->ring_obj = NULL;
5354         r600_ring_init(rdev, ring, 1024 * 1024);
5355
5356         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5357         ring->ring_obj = NULL;
5358         r600_ring_init(rdev, ring, 1024 * 1024);
5359
5360         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5361         ring->ring_obj = NULL;
5362         r600_ring_init(rdev, ring, 1024 * 1024);
5363
5364         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5365         ring->ring_obj = NULL;
5366         r600_ring_init(rdev, ring, 64 * 1024);
5367
5368         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5369         ring->ring_obj = NULL;
5370         r600_ring_init(rdev, ring, 64 * 1024);
5371
5372         r = radeon_uvd_init(rdev);
5373         if (!r) {
5374                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5375                 ring->ring_obj = NULL;
5376                 r600_ring_init(rdev, ring, 4096);
5377         }
5378
5379         rdev->ih.ring_obj = NULL;
5380         r600_ih_ring_init(rdev, 64 * 1024);
5381
5382         r = r600_pcie_gart_init(rdev);
5383         if (r)
5384                 return r;
5385
5386         rdev->accel_working = true;
5387         r = si_startup(rdev);
5388         if (r) {
5389                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5390                 si_cp_fini(rdev);
5391                 cayman_dma_fini(rdev);
5392                 si_irq_fini(rdev);
5393                 si_rlc_fini(rdev);
5394                 radeon_wb_fini(rdev);
5395                 radeon_ib_pool_fini(rdev);
5396                 radeon_vm_manager_fini(rdev);
5397                 radeon_irq_kms_fini(rdev);
5398                 si_pcie_gart_fini(rdev);
5399                 rdev->accel_working = false;
5400         }
5401
5402         /* Don't start up if the MC ucode is missing.
5403          * The default clocks and voltages before the MC ucode
5404          * is loaded are not suffient for advanced operations.
5405          */
5406         if (!rdev->mc_fw) {
5407                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5408                 return -EINVAL;
5409         }
5410
5411         return 0;
5412 }
5413
5414 void si_fini(struct radeon_device *rdev)
5415 {
5416         si_cp_fini(rdev);
5417         cayman_dma_fini(rdev);
5418         si_irq_fini(rdev);
5419         si_rlc_fini(rdev);
5420         radeon_wb_fini(rdev);
5421         radeon_vm_manager_fini(rdev);
5422         radeon_ib_pool_fini(rdev);
5423         radeon_irq_kms_fini(rdev);
5424         radeon_uvd_fini(rdev);
5425         si_pcie_gart_fini(rdev);
5426         r600_vram_scratch_fini(rdev);
5427         radeon_gem_fini(rdev);
5428         radeon_fence_driver_fini(rdev);
5429         radeon_bo_fini(rdev);
5430         radeon_atombios_fini(rdev);
5431         kfree(rdev->bios);
5432         rdev->bios = NULL;
5433 }
5434
5435 /**
5436  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5437  *
5438  * @rdev: radeon_device pointer
5439  *
5440  * Fetches a GPU clock counter snapshot (SI).
5441  * Returns the 64 bit clock counter snapshot.
5442  */
5443 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5444 {
5445         uint64_t clock;
5446
5447         mutex_lock(&rdev->gpu_clock_mutex);
5448         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5449         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5450                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5451         mutex_unlock(&rdev->gpu_clock_mutex);
5452         return clock;
5453 }
5454
5455 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5456 {
5457         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5458         int r;
5459
5460         /* bypass vclk and dclk with bclk */
5461         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5462                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5463                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5464
5465         /* put PLL in bypass mode */
5466         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5467
5468         if (!vclk || !dclk) {
5469                 /* keep the Bypass mode, put PLL to sleep */
5470                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5471                 return 0;
5472         }
5473
5474         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5475                                           16384, 0x03FFFFFF, 0, 128, 5,
5476                                           &fb_div, &vclk_div, &dclk_div);
5477         if (r)
5478                 return r;
5479
5480         /* set RESET_ANTI_MUX to 0 */
5481         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5482
5483         /* set VCO_MODE to 1 */
5484         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5485
5486         /* toggle UPLL_SLEEP to 1 then back to 0 */
5487         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5488         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5489
5490         /* deassert UPLL_RESET */
5491         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5492
5493         mdelay(1);
5494
5495         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5496         if (r)
5497                 return r;
5498
5499         /* assert UPLL_RESET again */
5500         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5501
5502         /* disable spread spectrum. */
5503         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5504
5505         /* set feedback divider */
5506         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5507
5508         /* set ref divider to 0 */
5509         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5510
5511         if (fb_div < 307200)
5512                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5513         else
5514                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5515
5516         /* set PDIV_A and PDIV_B */
5517         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5518                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5519                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5520
5521         /* give the PLL some time to settle */
5522         mdelay(15);
5523
5524         /* deassert PLL_RESET */
5525         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5526
5527         mdelay(15);
5528
5529         /* switch from bypass mode to normal mode */
5530         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5531
5532         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5533         if (r)
5534                 return r;
5535
5536         /* switch VCLK and DCLK selection */
5537         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5538                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5539                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5540
5541         mdelay(100);
5542
5543         return 0;
5544 }