]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33
34 /* GFX */
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
38 /* compute */
39 #define CIK_MEC_UCODE_SIZE 4192
40 /* interrupts */
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
44 /* gddr controller */
45 #define CIK_MC_UCODE_SIZE 7866
46 /* sdma */
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
49
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79
80 /*
81  * Indirect registers accessor
82  */
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84 {
85         u32 r;
86
87         WREG32(PCIE_INDEX, reg);
88         (void)RREG32(PCIE_INDEX);
89         r = RREG32(PCIE_DATA);
90         return r;
91 }
92
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94 {
95         WREG32(PCIE_INDEX, reg);
96         (void)RREG32(PCIE_INDEX);
97         WREG32(PCIE_DATA, v);
98         (void)RREG32(PCIE_DATA);
99 }
100
101 static const u32 bonaire_golden_spm_registers[] =
102 {
103         0x30800, 0xe0ffffff, 0xe0000000
104 };
105
106 static const u32 bonaire_golden_common_registers[] =
107 {
108         0xc770, 0xffffffff, 0x00000800,
109         0xc774, 0xffffffff, 0x00000800,
110         0xc798, 0xffffffff, 0x00007fbf,
111         0xc79c, 0xffffffff, 0x00007faf
112 };
113
114 static const u32 bonaire_golden_registers[] =
115 {
116         0x3354, 0x00000333, 0x00000333,
117         0x3350, 0x000c0fc0, 0x00040200,
118         0x9a10, 0x00010000, 0x00058208,
119         0x3c000, 0xffff1fff, 0x00140000,
120         0x3c200, 0xfdfc0fff, 0x00000100,
121         0x3c234, 0x40000000, 0x40000200,
122         0x9830, 0xffffffff, 0x00000000,
123         0x9834, 0xf00fffff, 0x00000400,
124         0x9838, 0x0002021c, 0x00020200,
125         0xc78, 0x00000080, 0x00000000,
126         0x5bb0, 0x000000f0, 0x00000070,
127         0x5bc0, 0xf0311fff, 0x80300000,
128         0x98f8, 0x73773777, 0x12010001,
129         0x350c, 0x00810000, 0x408af000,
130         0x7030, 0x31000111, 0x00000011,
131         0x2f48, 0x73773777, 0x12010001,
132         0x220c, 0x00007fb6, 0x0021a1b1,
133         0x2210, 0x00007fb6, 0x002021b1,
134         0x2180, 0x00007fb6, 0x00002191,
135         0x2218, 0x00007fb6, 0x002121b1,
136         0x221c, 0x00007fb6, 0x002021b1,
137         0x21dc, 0x00007fb6, 0x00002191,
138         0x21e0, 0x00007fb6, 0x00002191,
139         0x3628, 0x0000003f, 0x0000000a,
140         0x362c, 0x0000003f, 0x0000000a,
141         0x2ae4, 0x00073ffe, 0x000022a2,
142         0x240c, 0x000007ff, 0x00000000,
143         0x8a14, 0xf000003f, 0x00000007,
144         0x8bf0, 0x00002001, 0x00000001,
145         0x8b24, 0xffffffff, 0x00ffffff,
146         0x30a04, 0x0000ff0f, 0x00000000,
147         0x28a4c, 0x07ffffff, 0x06000000,
148         0x4d8, 0x00000fff, 0x00000100,
149         0x3e78, 0x00000001, 0x00000002,
150         0x9100, 0x03000000, 0x0362c688,
151         0x8c00, 0x000000ff, 0x00000001,
152         0xe40, 0x00001fff, 0x00001fff,
153         0x9060, 0x0000007f, 0x00000020,
154         0x9508, 0x00010000, 0x00010000,
155         0xac14, 0x000003ff, 0x000000f3,
156         0xac0c, 0xffffffff, 0x00001032
157 };
158
159 static const u32 bonaire_mgcg_cgcg_init[] =
160 {
161         0xc420, 0xffffffff, 0xfffffffc,
162         0x30800, 0xffffffff, 0xe0000000,
163         0x3c2a0, 0xffffffff, 0x00000100,
164         0x3c208, 0xffffffff, 0x00000100,
165         0x3c2c0, 0xffffffff, 0xc0000100,
166         0x3c2c8, 0xffffffff, 0xc0000100,
167         0x3c2c4, 0xffffffff, 0xc0000100,
168         0x55e4, 0xffffffff, 0x00600100,
169         0x3c280, 0xffffffff, 0x00000100,
170         0x3c214, 0xffffffff, 0x06000100,
171         0x3c220, 0xffffffff, 0x00000100,
172         0x3c218, 0xffffffff, 0x06000100,
173         0x3c204, 0xffffffff, 0x00000100,
174         0x3c2e0, 0xffffffff, 0x00000100,
175         0x3c224, 0xffffffff, 0x00000100,
176         0x3c200, 0xffffffff, 0x00000100,
177         0x3c230, 0xffffffff, 0x00000100,
178         0x3c234, 0xffffffff, 0x00000100,
179         0x3c250, 0xffffffff, 0x00000100,
180         0x3c254, 0xffffffff, 0x00000100,
181         0x3c258, 0xffffffff, 0x00000100,
182         0x3c25c, 0xffffffff, 0x00000100,
183         0x3c260, 0xffffffff, 0x00000100,
184         0x3c27c, 0xffffffff, 0x00000100,
185         0x3c278, 0xffffffff, 0x00000100,
186         0x3c210, 0xffffffff, 0x06000100,
187         0x3c290, 0xffffffff, 0x00000100,
188         0x3c274, 0xffffffff, 0x00000100,
189         0x3c2b4, 0xffffffff, 0x00000100,
190         0x3c2b0, 0xffffffff, 0x00000100,
191         0x3c270, 0xffffffff, 0x00000100,
192         0x30800, 0xffffffff, 0xe0000000,
193         0x3c020, 0xffffffff, 0x00010000,
194         0x3c024, 0xffffffff, 0x00030002,
195         0x3c028, 0xffffffff, 0x00040007,
196         0x3c02c, 0xffffffff, 0x00060005,
197         0x3c030, 0xffffffff, 0x00090008,
198         0x3c034, 0xffffffff, 0x00010000,
199         0x3c038, 0xffffffff, 0x00030002,
200         0x3c03c, 0xffffffff, 0x00040007,
201         0x3c040, 0xffffffff, 0x00060005,
202         0x3c044, 0xffffffff, 0x00090008,
203         0x3c048, 0xffffffff, 0x00010000,
204         0x3c04c, 0xffffffff, 0x00030002,
205         0x3c050, 0xffffffff, 0x00040007,
206         0x3c054, 0xffffffff, 0x00060005,
207         0x3c058, 0xffffffff, 0x00090008,
208         0x3c05c, 0xffffffff, 0x00010000,
209         0x3c060, 0xffffffff, 0x00030002,
210         0x3c064, 0xffffffff, 0x00040007,
211         0x3c068, 0xffffffff, 0x00060005,
212         0x3c06c, 0xffffffff, 0x00090008,
213         0x3c070, 0xffffffff, 0x00010000,
214         0x3c074, 0xffffffff, 0x00030002,
215         0x3c078, 0xffffffff, 0x00040007,
216         0x3c07c, 0xffffffff, 0x00060005,
217         0x3c080, 0xffffffff, 0x00090008,
218         0x3c084, 0xffffffff, 0x00010000,
219         0x3c088, 0xffffffff, 0x00030002,
220         0x3c08c, 0xffffffff, 0x00040007,
221         0x3c090, 0xffffffff, 0x00060005,
222         0x3c094, 0xffffffff, 0x00090008,
223         0x3c098, 0xffffffff, 0x00010000,
224         0x3c09c, 0xffffffff, 0x00030002,
225         0x3c0a0, 0xffffffff, 0x00040007,
226         0x3c0a4, 0xffffffff, 0x00060005,
227         0x3c0a8, 0xffffffff, 0x00090008,
228         0x3c000, 0xffffffff, 0x96e00200,
229         0x8708, 0xffffffff, 0x00900100,
230         0xc424, 0xffffffff, 0x0020003f,
231         0x38, 0xffffffff, 0x0140001c,
232         0x3c, 0x000f0000, 0x000f0000,
233         0x220, 0xffffffff, 0xC060000C,
234         0x224, 0xc0000fff, 0x00000100,
235         0xf90, 0xffffffff, 0x00000100,
236         0xf98, 0x00000101, 0x00000000,
237         0x20a8, 0xffffffff, 0x00000104,
238         0x55e4, 0xff000fff, 0x00000100,
239         0x30cc, 0xc0000fff, 0x00000104,
240         0xc1e4, 0x00000001, 0x00000001,
241         0xd00c, 0xff000ff0, 0x00000100,
242         0xd80c, 0xff000ff0, 0x00000100
243 };
244
245 static const u32 spectre_golden_spm_registers[] =
246 {
247         0x30800, 0xe0ffffff, 0xe0000000
248 };
249
250 static const u32 spectre_golden_common_registers[] =
251 {
252         0xc770, 0xffffffff, 0x00000800,
253         0xc774, 0xffffffff, 0x00000800,
254         0xc798, 0xffffffff, 0x00007fbf,
255         0xc79c, 0xffffffff, 0x00007faf
256 };
257
258 static const u32 spectre_golden_registers[] =
259 {
260         0x3c000, 0xffff1fff, 0x96940200,
261         0x3c00c, 0xffff0001, 0xff000000,
262         0x3c200, 0xfffc0fff, 0x00000100,
263         0x6ed8, 0x00010101, 0x00010000,
264         0x9834, 0xf00fffff, 0x00000400,
265         0x9838, 0xfffffffc, 0x00020200,
266         0x5bb0, 0x000000f0, 0x00000070,
267         0x5bc0, 0xf0311fff, 0x80300000,
268         0x98f8, 0x73773777, 0x12010001,
269         0x9b7c, 0x00ff0000, 0x00fc0000,
270         0x2f48, 0x73773777, 0x12010001,
271         0x8a14, 0xf000003f, 0x00000007,
272         0x8b24, 0xffffffff, 0x00ffffff,
273         0x28350, 0x3f3f3fff, 0x00000082,
274         0x28355, 0x0000003f, 0x00000000,
275         0x3e78, 0x00000001, 0x00000002,
276         0x913c, 0xffff03df, 0x00000004,
277         0xc768, 0x00000008, 0x00000008,
278         0x8c00, 0x000008ff, 0x00000800,
279         0x9508, 0x00010000, 0x00010000,
280         0xac0c, 0xffffffff, 0x54763210,
281         0x214f8, 0x01ff01ff, 0x00000002,
282         0x21498, 0x007ff800, 0x00200000,
283         0x2015c, 0xffffffff, 0x00000f40,
284         0x30934, 0xffffffff, 0x00000001
285 };
286
287 static const u32 spectre_mgcg_cgcg_init[] =
288 {
289         0xc420, 0xffffffff, 0xfffffffc,
290         0x30800, 0xffffffff, 0xe0000000,
291         0x3c2a0, 0xffffffff, 0x00000100,
292         0x3c208, 0xffffffff, 0x00000100,
293         0x3c2c0, 0xffffffff, 0x00000100,
294         0x3c2c8, 0xffffffff, 0x00000100,
295         0x3c2c4, 0xffffffff, 0x00000100,
296         0x55e4, 0xffffffff, 0x00600100,
297         0x3c280, 0xffffffff, 0x00000100,
298         0x3c214, 0xffffffff, 0x06000100,
299         0x3c220, 0xffffffff, 0x00000100,
300         0x3c218, 0xffffffff, 0x06000100,
301         0x3c204, 0xffffffff, 0x00000100,
302         0x3c2e0, 0xffffffff, 0x00000100,
303         0x3c224, 0xffffffff, 0x00000100,
304         0x3c200, 0xffffffff, 0x00000100,
305         0x3c230, 0xffffffff, 0x00000100,
306         0x3c234, 0xffffffff, 0x00000100,
307         0x3c250, 0xffffffff, 0x00000100,
308         0x3c254, 0xffffffff, 0x00000100,
309         0x3c258, 0xffffffff, 0x00000100,
310         0x3c25c, 0xffffffff, 0x00000100,
311         0x3c260, 0xffffffff, 0x00000100,
312         0x3c27c, 0xffffffff, 0x00000100,
313         0x3c278, 0xffffffff, 0x00000100,
314         0x3c210, 0xffffffff, 0x06000100,
315         0x3c290, 0xffffffff, 0x00000100,
316         0x3c274, 0xffffffff, 0x00000100,
317         0x3c2b4, 0xffffffff, 0x00000100,
318         0x3c2b0, 0xffffffff, 0x00000100,
319         0x3c270, 0xffffffff, 0x00000100,
320         0x30800, 0xffffffff, 0xe0000000,
321         0x3c020, 0xffffffff, 0x00010000,
322         0x3c024, 0xffffffff, 0x00030002,
323         0x3c028, 0xffffffff, 0x00040007,
324         0x3c02c, 0xffffffff, 0x00060005,
325         0x3c030, 0xffffffff, 0x00090008,
326         0x3c034, 0xffffffff, 0x00010000,
327         0x3c038, 0xffffffff, 0x00030002,
328         0x3c03c, 0xffffffff, 0x00040007,
329         0x3c040, 0xffffffff, 0x00060005,
330         0x3c044, 0xffffffff, 0x00090008,
331         0x3c048, 0xffffffff, 0x00010000,
332         0x3c04c, 0xffffffff, 0x00030002,
333         0x3c050, 0xffffffff, 0x00040007,
334         0x3c054, 0xffffffff, 0x00060005,
335         0x3c058, 0xffffffff, 0x00090008,
336         0x3c05c, 0xffffffff, 0x00010000,
337         0x3c060, 0xffffffff, 0x00030002,
338         0x3c064, 0xffffffff, 0x00040007,
339         0x3c068, 0xffffffff, 0x00060005,
340         0x3c06c, 0xffffffff, 0x00090008,
341         0x3c070, 0xffffffff, 0x00010000,
342         0x3c074, 0xffffffff, 0x00030002,
343         0x3c078, 0xffffffff, 0x00040007,
344         0x3c07c, 0xffffffff, 0x00060005,
345         0x3c080, 0xffffffff, 0x00090008,
346         0x3c084, 0xffffffff, 0x00010000,
347         0x3c088, 0xffffffff, 0x00030002,
348         0x3c08c, 0xffffffff, 0x00040007,
349         0x3c090, 0xffffffff, 0x00060005,
350         0x3c094, 0xffffffff, 0x00090008,
351         0x3c098, 0xffffffff, 0x00010000,
352         0x3c09c, 0xffffffff, 0x00030002,
353         0x3c0a0, 0xffffffff, 0x00040007,
354         0x3c0a4, 0xffffffff, 0x00060005,
355         0x3c0a8, 0xffffffff, 0x00090008,
356         0x3c0ac, 0xffffffff, 0x00010000,
357         0x3c0b0, 0xffffffff, 0x00030002,
358         0x3c0b4, 0xffffffff, 0x00040007,
359         0x3c0b8, 0xffffffff, 0x00060005,
360         0x3c0bc, 0xffffffff, 0x00090008,
361         0x3c000, 0xffffffff, 0x96e00200,
362         0x8708, 0xffffffff, 0x00900100,
363         0xc424, 0xffffffff, 0x0020003f,
364         0x38, 0xffffffff, 0x0140001c,
365         0x3c, 0x000f0000, 0x000f0000,
366         0x220, 0xffffffff, 0xC060000C,
367         0x224, 0xc0000fff, 0x00000100,
368         0xf90, 0xffffffff, 0x00000100,
369         0xf98, 0x00000101, 0x00000000,
370         0x20a8, 0xffffffff, 0x00000104,
371         0x55e4, 0xff000fff, 0x00000100,
372         0x30cc, 0xc0000fff, 0x00000104,
373         0xc1e4, 0x00000001, 0x00000001,
374         0xd00c, 0xff000ff0, 0x00000100,
375         0xd80c, 0xff000ff0, 0x00000100
376 };
377
378 static const u32 kalindi_golden_spm_registers[] =
379 {
380         0x30800, 0xe0ffffff, 0xe0000000
381 };
382
383 static const u32 kalindi_golden_common_registers[] =
384 {
385         0xc770, 0xffffffff, 0x00000800,
386         0xc774, 0xffffffff, 0x00000800,
387         0xc798, 0xffffffff, 0x00007fbf,
388         0xc79c, 0xffffffff, 0x00007faf
389 };
390
391 static const u32 kalindi_golden_registers[] =
392 {
393         0x3c000, 0xffffdfff, 0x6e944040,
394         0x55e4, 0xff607fff, 0xfc000100,
395         0x3c220, 0xff000fff, 0x00000100,
396         0x3c224, 0xff000fff, 0x00000100,
397         0x3c200, 0xfffc0fff, 0x00000100,
398         0x6ed8, 0x00010101, 0x00010000,
399         0x9830, 0xffffffff, 0x00000000,
400         0x9834, 0xf00fffff, 0x00000400,
401         0x5bb0, 0x000000f0, 0x00000070,
402         0x5bc0, 0xf0311fff, 0x80300000,
403         0x98f8, 0x73773777, 0x12010001,
404         0x98fc, 0xffffffff, 0x00000010,
405         0x9b7c, 0x00ff0000, 0x00fc0000,
406         0x8030, 0x00001f0f, 0x0000100a,
407         0x2f48, 0x73773777, 0x12010001,
408         0x2408, 0x000fffff, 0x000c007f,
409         0x8a14, 0xf000003f, 0x00000007,
410         0x8b24, 0x3fff3fff, 0x00ffcfff,
411         0x30a04, 0x0000ff0f, 0x00000000,
412         0x28a4c, 0x07ffffff, 0x06000000,
413         0x4d8, 0x00000fff, 0x00000100,
414         0x3e78, 0x00000001, 0x00000002,
415         0xc768, 0x00000008, 0x00000008,
416         0x8c00, 0x000000ff, 0x00000003,
417         0x214f8, 0x01ff01ff, 0x00000002,
418         0x21498, 0x007ff800, 0x00200000,
419         0x2015c, 0xffffffff, 0x00000f40,
420         0x88c4, 0x001f3ae3, 0x00000082,
421         0x88d4, 0x0000001f, 0x00000010,
422         0x30934, 0xffffffff, 0x00000000
423 };
424
425 static const u32 kalindi_mgcg_cgcg_init[] =
426 {
427         0xc420, 0xffffffff, 0xfffffffc,
428         0x30800, 0xffffffff, 0xe0000000,
429         0x3c2a0, 0xffffffff, 0x00000100,
430         0x3c208, 0xffffffff, 0x00000100,
431         0x3c2c0, 0xffffffff, 0x00000100,
432         0x3c2c8, 0xffffffff, 0x00000100,
433         0x3c2c4, 0xffffffff, 0x00000100,
434         0x55e4, 0xffffffff, 0x00600100,
435         0x3c280, 0xffffffff, 0x00000100,
436         0x3c214, 0xffffffff, 0x06000100,
437         0x3c220, 0xffffffff, 0x00000100,
438         0x3c218, 0xffffffff, 0x06000100,
439         0x3c204, 0xffffffff, 0x00000100,
440         0x3c2e0, 0xffffffff, 0x00000100,
441         0x3c224, 0xffffffff, 0x00000100,
442         0x3c200, 0xffffffff, 0x00000100,
443         0x3c230, 0xffffffff, 0x00000100,
444         0x3c234, 0xffffffff, 0x00000100,
445         0x3c250, 0xffffffff, 0x00000100,
446         0x3c254, 0xffffffff, 0x00000100,
447         0x3c258, 0xffffffff, 0x00000100,
448         0x3c25c, 0xffffffff, 0x00000100,
449         0x3c260, 0xffffffff, 0x00000100,
450         0x3c27c, 0xffffffff, 0x00000100,
451         0x3c278, 0xffffffff, 0x00000100,
452         0x3c210, 0xffffffff, 0x06000100,
453         0x3c290, 0xffffffff, 0x00000100,
454         0x3c274, 0xffffffff, 0x00000100,
455         0x3c2b4, 0xffffffff, 0x00000100,
456         0x3c2b0, 0xffffffff, 0x00000100,
457         0x3c270, 0xffffffff, 0x00000100,
458         0x30800, 0xffffffff, 0xe0000000,
459         0x3c020, 0xffffffff, 0x00010000,
460         0x3c024, 0xffffffff, 0x00030002,
461         0x3c028, 0xffffffff, 0x00040007,
462         0x3c02c, 0xffffffff, 0x00060005,
463         0x3c030, 0xffffffff, 0x00090008,
464         0x3c034, 0xffffffff, 0x00010000,
465         0x3c038, 0xffffffff, 0x00030002,
466         0x3c03c, 0xffffffff, 0x00040007,
467         0x3c040, 0xffffffff, 0x00060005,
468         0x3c044, 0xffffffff, 0x00090008,
469         0x3c000, 0xffffffff, 0x96e00200,
470         0x8708, 0xffffffff, 0x00900100,
471         0xc424, 0xffffffff, 0x0020003f,
472         0x38, 0xffffffff, 0x0140001c,
473         0x3c, 0x000f0000, 0x000f0000,
474         0x220, 0xffffffff, 0xC060000C,
475         0x224, 0xc0000fff, 0x00000100,
476         0x20a8, 0xffffffff, 0x00000104,
477         0x55e4, 0xff000fff, 0x00000100,
478         0x30cc, 0xc0000fff, 0x00000104,
479         0xc1e4, 0x00000001, 0x00000001,
480         0xd00c, 0xff000ff0, 0x00000100,
481         0xd80c, 0xff000ff0, 0x00000100
482 };
483
484 static void cik_init_golden_registers(struct radeon_device *rdev)
485 {
486         switch (rdev->family) {
487         case CHIP_BONAIRE:
488                 radeon_program_register_sequence(rdev,
489                                                  bonaire_mgcg_cgcg_init,
490                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491                 radeon_program_register_sequence(rdev,
492                                                  bonaire_golden_registers,
493                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
494                 radeon_program_register_sequence(rdev,
495                                                  bonaire_golden_common_registers,
496                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497                 radeon_program_register_sequence(rdev,
498                                                  bonaire_golden_spm_registers,
499                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500                 break;
501         case CHIP_KABINI:
502                 radeon_program_register_sequence(rdev,
503                                                  kalindi_mgcg_cgcg_init,
504                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505                 radeon_program_register_sequence(rdev,
506                                                  kalindi_golden_registers,
507                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
508                 radeon_program_register_sequence(rdev,
509                                                  kalindi_golden_common_registers,
510                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511                 radeon_program_register_sequence(rdev,
512                                                  kalindi_golden_spm_registers,
513                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514                 break;
515         case CHIP_KAVERI:
516                 radeon_program_register_sequence(rdev,
517                                                  spectre_mgcg_cgcg_init,
518                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519                 radeon_program_register_sequence(rdev,
520                                                  spectre_golden_registers,
521                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
522                 radeon_program_register_sequence(rdev,
523                                                  spectre_golden_common_registers,
524                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525                 radeon_program_register_sequence(rdev,
526                                                  spectre_golden_spm_registers,
527                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528                 break;
529         default:
530                 break;
531         }
532 }
533
534 /**
535  * cik_get_xclk - get the xclk
536  *
537  * @rdev: radeon_device pointer
538  *
539  * Returns the reference clock used by the gfx engine
540  * (CIK).
541  */
542 u32 cik_get_xclk(struct radeon_device *rdev)
543 {
544         u32 reference_clock = rdev->clock.spll.reference_freq;
545
546         if (rdev->flags & RADEON_IS_IGP) {
547                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548                         return reference_clock / 2;
549         } else {
550                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551                         return reference_clock / 4;
552         }
553         return reference_clock;
554 }
555
556 /**
557  * cik_mm_rdoorbell - read a doorbell dword
558  *
559  * @rdev: radeon_device pointer
560  * @offset: byte offset into the aperture
561  *
562  * Returns the value in the doorbell aperture at the
563  * requested offset (CIK).
564  */
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566 {
567         if (offset < rdev->doorbell.size) {
568                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569         } else {
570                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571                 return 0;
572         }
573 }
574
575 /**
576  * cik_mm_wdoorbell - write a doorbell dword
577  *
578  * @rdev: radeon_device pointer
579  * @offset: byte offset into the aperture
580  * @v: value to write
581  *
582  * Writes @v to the doorbell aperture at the
583  * requested offset (CIK).
584  */
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586 {
587         if (offset < rdev->doorbell.size) {
588                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589         } else {
590                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591         }
592 }
593
594 #define BONAIRE_IO_MC_REGS_SIZE 36
595
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597 {
598         {0x00000070, 0x04400000},
599         {0x00000071, 0x80c01803},
600         {0x00000072, 0x00004004},
601         {0x00000073, 0x00000100},
602         {0x00000074, 0x00ff0000},
603         {0x00000075, 0x34000000},
604         {0x00000076, 0x08000014},
605         {0x00000077, 0x00cc08ec},
606         {0x00000078, 0x00000400},
607         {0x00000079, 0x00000000},
608         {0x0000007a, 0x04090000},
609         {0x0000007c, 0x00000000},
610         {0x0000007e, 0x4408a8e8},
611         {0x0000007f, 0x00000304},
612         {0x00000080, 0x00000000},
613         {0x00000082, 0x00000001},
614         {0x00000083, 0x00000002},
615         {0x00000084, 0xf3e4f400},
616         {0x00000085, 0x052024e3},
617         {0x00000087, 0x00000000},
618         {0x00000088, 0x01000000},
619         {0x0000008a, 0x1c0a0000},
620         {0x0000008b, 0xff010000},
621         {0x0000008d, 0xffffefff},
622         {0x0000008e, 0xfff3efff},
623         {0x0000008f, 0xfff3efbf},
624         {0x00000092, 0xf7ffffff},
625         {0x00000093, 0xffffff7f},
626         {0x00000095, 0x00101101},
627         {0x00000096, 0x00000fff},
628         {0x00000097, 0x00116fff},
629         {0x00000098, 0x60010000},
630         {0x00000099, 0x10010000},
631         {0x0000009a, 0x00006000},
632         {0x0000009b, 0x00001000},
633         {0x0000009f, 0x00b48000}
634 };
635
636 /**
637  * cik_srbm_select - select specific register instances
638  *
639  * @rdev: radeon_device pointer
640  * @me: selected ME (micro engine)
641  * @pipe: pipe
642  * @queue: queue
643  * @vmid: VMID
644  *
645  * Switches the currently active registers instances.  Some
646  * registers are instanced per VMID, others are instanced per
647  * me/pipe/queue combination.
648  */
649 static void cik_srbm_select(struct radeon_device *rdev,
650                             u32 me, u32 pipe, u32 queue, u32 vmid)
651 {
652         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653                              MEID(me & 0x3) |
654                              VMID(vmid & 0xf) |
655                              QUEUEID(queue & 0x7));
656         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657 }
658
659 /* ucode loading */
660 /**
661  * ci_mc_load_microcode - load MC ucode into the hw
662  *
663  * @rdev: radeon_device pointer
664  *
665  * Load the GDDR MC ucode into the hw (CIK).
666  * Returns 0 on success, error on failure.
667  */
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
669 {
670         const __be32 *fw_data;
671         u32 running, blackout = 0;
672         u32 *io_mc_regs;
673         int i, ucode_size, regs_size;
674
675         if (!rdev->mc_fw)
676                 return -EINVAL;
677
678         switch (rdev->family) {
679         case CHIP_BONAIRE:
680         default:
681                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682                 ucode_size = CIK_MC_UCODE_SIZE;
683                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
684                 break;
685         }
686
687         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688
689         if (running == 0) {
690                 if (running) {
691                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693                 }
694
695                 /* reset the engine and set to writable */
696                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698
699                 /* load mc io regs */
700                 for (i = 0; i < regs_size; i++) {
701                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703                 }
704                 /* load the MC ucode */
705                 fw_data = (const __be32 *)rdev->mc_fw->data;
706                 for (i = 0; i < ucode_size; i++)
707                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708
709                 /* put the engine back into the active state */
710                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713
714                 /* wait for training to complete */
715                 for (i = 0; i < rdev->usec_timeout; i++) {
716                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717                                 break;
718                         udelay(1);
719                 }
720                 for (i = 0; i < rdev->usec_timeout; i++) {
721                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722                                 break;
723                         udelay(1);
724                 }
725
726                 if (running)
727                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728         }
729
730         return 0;
731 }
732
733 /**
734  * cik_init_microcode - load ucode images from disk
735  *
736  * @rdev: radeon_device pointer
737  *
738  * Use the firmware interface to load the ucode images into
739  * the driver (not loaded into hw).
740  * Returns 0 on success, error on failure.
741  */
742 static int cik_init_microcode(struct radeon_device *rdev)
743 {
744         const char *chip_name;
745         size_t pfp_req_size, me_req_size, ce_req_size,
746                 mec_req_size, rlc_req_size, mc_req_size,
747                 sdma_req_size;
748         char fw_name[30];
749         int err;
750
751         DRM_DEBUG("\n");
752
753         switch (rdev->family) {
754         case CHIP_BONAIRE:
755                 chip_name = "BONAIRE";
756                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757                 me_req_size = CIK_ME_UCODE_SIZE * 4;
758                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
759                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
762                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763                 break;
764         case CHIP_KAVERI:
765                 chip_name = "KAVERI";
766                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767                 me_req_size = CIK_ME_UCODE_SIZE * 4;
768                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         case CHIP_KABINI:
774                 chip_name = "KABINI";
775                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776                 me_req_size = CIK_ME_UCODE_SIZE * 4;
777                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781                 break;
782         default: BUG();
783         }
784
785         DRM_INFO("Loading %s Microcode\n", chip_name);
786
787         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
789         if (err)
790                 goto out;
791         if (rdev->pfp_fw->size != pfp_req_size) {
792                 printk(KERN_ERR
793                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794                        rdev->pfp_fw->size, fw_name);
795                 err = -EINVAL;
796                 goto out;
797         }
798
799         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
801         if (err)
802                 goto out;
803         if (rdev->me_fw->size != me_req_size) {
804                 printk(KERN_ERR
805                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806                        rdev->me_fw->size, fw_name);
807                 err = -EINVAL;
808         }
809
810         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
812         if (err)
813                 goto out;
814         if (rdev->ce_fw->size != ce_req_size) {
815                 printk(KERN_ERR
816                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817                        rdev->ce_fw->size, fw_name);
818                 err = -EINVAL;
819         }
820
821         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
823         if (err)
824                 goto out;
825         if (rdev->mec_fw->size != mec_req_size) {
826                 printk(KERN_ERR
827                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828                        rdev->mec_fw->size, fw_name);
829                 err = -EINVAL;
830         }
831
832         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
834         if (err)
835                 goto out;
836         if (rdev->rlc_fw->size != rlc_req_size) {
837                 printk(KERN_ERR
838                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839                        rdev->rlc_fw->size, fw_name);
840                 err = -EINVAL;
841         }
842
843         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
845         if (err)
846                 goto out;
847         if (rdev->sdma_fw->size != sdma_req_size) {
848                 printk(KERN_ERR
849                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850                        rdev->sdma_fw->size, fw_name);
851                 err = -EINVAL;
852         }
853
854         /* No MC ucode on APUs */
855         if (!(rdev->flags & RADEON_IS_IGP)) {
856                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
858                 if (err)
859                         goto out;
860                 if (rdev->mc_fw->size != mc_req_size) {
861                         printk(KERN_ERR
862                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863                                rdev->mc_fw->size, fw_name);
864                         err = -EINVAL;
865                 }
866         }
867
868 out:
869         if (err) {
870                 if (err != -EINVAL)
871                         printk(KERN_ERR
872                                "cik_cp: Failed to load firmware \"%s\"\n",
873                                fw_name);
874                 release_firmware(rdev->pfp_fw);
875                 rdev->pfp_fw = NULL;
876                 release_firmware(rdev->me_fw);
877                 rdev->me_fw = NULL;
878                 release_firmware(rdev->ce_fw);
879                 rdev->ce_fw = NULL;
880                 release_firmware(rdev->rlc_fw);
881                 rdev->rlc_fw = NULL;
882                 release_firmware(rdev->mc_fw);
883                 rdev->mc_fw = NULL;
884         }
885         return err;
886 }
887
888 /*
889  * Core functions
890  */
891 /**
892  * cik_tiling_mode_table_init - init the hw tiling table
893  *
894  * @rdev: radeon_device pointer
895  *
896  * Starting with SI, the tiling setup is done globally in a
897  * set of 32 tiling modes.  Rather than selecting each set of
898  * parameters per surface as on older asics, we just select
899  * which index in the tiling table we want to use, and the
900  * surface uses those parameters (CIK).
901  */
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903 {
904         const u32 num_tile_mode_states = 32;
905         const u32 num_secondary_tile_mode_states = 16;
906         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907         u32 num_pipe_configs;
908         u32 num_rbs = rdev->config.cik.max_backends_per_se *
909                 rdev->config.cik.max_shader_engines;
910
911         switch (rdev->config.cik.mem_row_size_in_kb) {
912         case 1:
913                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914                 break;
915         case 2:
916         default:
917                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918                 break;
919         case 4:
920                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921                 break;
922         }
923
924         num_pipe_configs = rdev->config.cik.max_tile_pipes;
925         if (num_pipe_configs > 8)
926                 num_pipe_configs = 8; /* ??? */
927
928         if (num_pipe_configs == 8) {
929                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930                         switch (reg_offset) {
931                         case 0:
932                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936                                 break;
937                         case 1:
938                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942                                 break;
943                         case 2:
944                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948                                 break;
949                         case 3:
950                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954                                 break;
955                         case 4:
956                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959                                                  TILE_SPLIT(split_equal_to_row_size));
960                                 break;
961                         case 5:
962                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964                                 break;
965                         case 6:
966                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970                                 break;
971                         case 7:
972                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975                                                  TILE_SPLIT(split_equal_to_row_size));
976                                 break;
977                         case 8:
978                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980                                 break;
981                         case 9:
982                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984                                 break;
985                         case 10:
986                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990                                 break;
991                         case 11:
992                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996                                 break;
997                         case 12:
998                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002                                 break;
1003                         case 13:
1004                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006                                 break;
1007                         case 14:
1008                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012                                 break;
1013                         case 16:
1014                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018                                 break;
1019                         case 17:
1020                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024                                 break;
1025                         case 27:
1026                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028                                 break;
1029                         case 28:
1030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034                                 break;
1035                         case 29:
1036                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040                                 break;
1041                         case 30:
1042                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046                                 break;
1047                         default:
1048                                 gb_tile_moden = 0;
1049                                 break;
1050                         }
1051                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053                 }
1054                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055                         switch (reg_offset) {
1056                         case 0:
1057                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1061                                 break;
1062                         case 1:
1063                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1067                                 break;
1068                         case 2:
1069                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1073                                 break;
1074                         case 3:
1075                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1079                                 break;
1080                         case 4:
1081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1085                                 break;
1086                         case 5:
1087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1091                                 break;
1092                         case 6:
1093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1097                                 break;
1098                         case 8:
1099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1103                                 break;
1104                         case 9:
1105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1109                                 break;
1110                         case 10:
1111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1115                                 break;
1116                         case 11:
1117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1121                                 break;
1122                         case 12:
1123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1127                                 break;
1128                         case 13:
1129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1133                                 break;
1134                         case 14:
1135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1139                                 break;
1140                         default:
1141                                 gb_tile_moden = 0;
1142                                 break;
1143                         }
1144                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145                 }
1146         } else if (num_pipe_configs == 4) {
1147                 if (num_rbs == 4) {
1148                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149                                 switch (reg_offset) {
1150                                 case 0:
1151                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155                                         break;
1156                                 case 1:
1157                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161                                         break;
1162                                 case 2:
1163                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167                                         break;
1168                                 case 3:
1169                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173                                         break;
1174                                 case 4:
1175                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178                                                          TILE_SPLIT(split_equal_to_row_size));
1179                                         break;
1180                                 case 5:
1181                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183                                         break;
1184                                 case 6:
1185                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189                                         break;
1190                                 case 7:
1191                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194                                                          TILE_SPLIT(split_equal_to_row_size));
1195                                         break;
1196                                 case 8:
1197                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199                                         break;
1200                                 case 9:
1201                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203                                         break;
1204                                 case 10:
1205                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209                                         break;
1210                                 case 11:
1211                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215                                         break;
1216                                 case 12:
1217                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221                                         break;
1222                                 case 13:
1223                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225                                         break;
1226                                 case 14:
1227                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231                                         break;
1232                                 case 16:
1233                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237                                         break;
1238                                 case 17:
1239                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243                                         break;
1244                                 case 27:
1245                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247                                         break;
1248                                 case 28:
1249                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253                                         break;
1254                                 case 29:
1255                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259                                         break;
1260                                 case 30:
1261                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265                                         break;
1266                                 default:
1267                                         gb_tile_moden = 0;
1268                                         break;
1269                                 }
1270                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272                         }
1273                 } else if (num_rbs < 4) {
1274                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275                                 switch (reg_offset) {
1276                                 case 0:
1277                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281                                         break;
1282                                 case 1:
1283                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287                                         break;
1288                                 case 2:
1289                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293                                         break;
1294                                 case 3:
1295                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299                                         break;
1300                                 case 4:
1301                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304                                                          TILE_SPLIT(split_equal_to_row_size));
1305                                         break;
1306                                 case 5:
1307                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309                                         break;
1310                                 case 6:
1311                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315                                         break;
1316                                 case 7:
1317                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320                                                          TILE_SPLIT(split_equal_to_row_size));
1321                                         break;
1322                                 case 8:
1323                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325                                         break;
1326                                 case 9:
1327                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329                                         break;
1330                                 case 10:
1331                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335                                         break;
1336                                 case 11:
1337                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341                                         break;
1342                                 case 12:
1343                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347                                         break;
1348                                 case 13:
1349                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351                                         break;
1352                                 case 14:
1353                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357                                         break;
1358                                 case 16:
1359                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363                                         break;
1364                                 case 17:
1365                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369                                         break;
1370                                 case 27:
1371                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373                                         break;
1374                                 case 28:
1375                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379                                         break;
1380                                 case 29:
1381                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385                                         break;
1386                                 case 30:
1387                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391                                         break;
1392                                 default:
1393                                         gb_tile_moden = 0;
1394                                         break;
1395                                 }
1396                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398                         }
1399                 }
1400                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401                         switch (reg_offset) {
1402                         case 0:
1403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1407                                 break;
1408                         case 1:
1409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1413                                 break;
1414                         case 2:
1415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1419                                 break;
1420                         case 3:
1421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1425                                 break;
1426                         case 4:
1427                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1431                                 break;
1432                         case 5:
1433                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1437                                 break;
1438                         case 6:
1439                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1443                                 break;
1444                         case 8:
1445                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1449                                 break;
1450                         case 9:
1451                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1455                                 break;
1456                         case 10:
1457                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1461                                 break;
1462                         case 11:
1463                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1467                                 break;
1468                         case 12:
1469                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1473                                 break;
1474                         case 13:
1475                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1479                                 break;
1480                         case 14:
1481                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1485                                 break;
1486                         default:
1487                                 gb_tile_moden = 0;
1488                                 break;
1489                         }
1490                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491                 }
1492         } else if (num_pipe_configs == 2) {
1493                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494                         switch (reg_offset) {
1495                         case 0:
1496                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1499                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500                                 break;
1501                         case 1:
1502                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1505                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506                                 break;
1507                         case 2:
1508                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1511                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512                                 break;
1513                         case 3:
1514                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1517                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518                                 break;
1519                         case 4:
1520                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1523                                                  TILE_SPLIT(split_equal_to_row_size));
1524                                 break;
1525                         case 5:
1526                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528                                 break;
1529                         case 6:
1530                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1533                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534                                 break;
1535                         case 7:
1536                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1539                                                  TILE_SPLIT(split_equal_to_row_size));
1540                                 break;
1541                         case 8:
1542                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543                                 break;
1544                         case 9:
1545                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547                                 break;
1548                         case 10:
1549                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1552                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553                                 break;
1554                         case 11:
1555                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1558                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559                                 break;
1560                         case 12:
1561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1564                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565                                 break;
1566                         case 13:
1567                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569                                 break;
1570                         case 14:
1571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1574                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575                                 break;
1576                         case 16:
1577                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1580                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581                                 break;
1582                         case 17:
1583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1586                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587                                 break;
1588                         case 27:
1589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591                                 break;
1592                         case 28:
1593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1596                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597                                 break;
1598                         case 29:
1599                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1602                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603                                 break;
1604                         case 30:
1605                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1608                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609                                 break;
1610                         default:
1611                                 gb_tile_moden = 0;
1612                                 break;
1613                         }
1614                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616                 }
1617                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618                         switch (reg_offset) {
1619                         case 0:
1620                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1624                                 break;
1625                         case 1:
1626                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1630                                 break;
1631                         case 2:
1632                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1636                                 break;
1637                         case 3:
1638                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1642                                 break;
1643                         case 4:
1644                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1648                                 break;
1649                         case 5:
1650                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1654                                 break;
1655                         case 6:
1656                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1660                                 break;
1661                         case 8:
1662                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1666                                 break;
1667                         case 9:
1668                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1672                                 break;
1673                         case 10:
1674                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1678                                 break;
1679                         case 11:
1680                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1684                                 break;
1685                         case 12:
1686                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1690                                 break;
1691                         case 13:
1692                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1696                                 break;
1697                         case 14:
1698                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1702                                 break;
1703                         default:
1704                                 gb_tile_moden = 0;
1705                                 break;
1706                         }
1707                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708                 }
1709         } else
1710                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711 }
1712
1713 /**
1714  * cik_select_se_sh - select which SE, SH to address
1715  *
1716  * @rdev: radeon_device pointer
1717  * @se_num: shader engine to address
1718  * @sh_num: sh block to address
1719  *
1720  * Select which SE, SH combinations to address. Certain
1721  * registers are instanced per SE or SH.  0xffffffff means
1722  * broadcast to all SEs or SHs (CIK).
1723  */
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725                              u32 se_num, u32 sh_num)
1726 {
1727         u32 data = INSTANCE_BROADCAST_WRITES;
1728
1729         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731         else if (se_num == 0xffffffff)
1732                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733         else if (sh_num == 0xffffffff)
1734                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735         else
1736                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737         WREG32(GRBM_GFX_INDEX, data);
1738 }
1739
1740 /**
1741  * cik_create_bitmask - create a bitmask
1742  *
1743  * @bit_width: length of the mask
1744  *
1745  * create a variable length bit mask (CIK).
1746  * Returns the bitmask.
1747  */
1748 static u32 cik_create_bitmask(u32 bit_width)
1749 {
1750         u32 i, mask = 0;
1751
1752         for (i = 0; i < bit_width; i++) {
1753                 mask <<= 1;
1754                 mask |= 1;
1755         }
1756         return mask;
1757 }
1758
1759 /**
1760  * cik_select_se_sh - select which SE, SH to address
1761  *
1762  * @rdev: radeon_device pointer
1763  * @max_rb_num: max RBs (render backends) for the asic
1764  * @se_num: number of SEs (shader engines) for the asic
1765  * @sh_per_se: number of SH blocks per SE for the asic
1766  *
1767  * Calculates the bitmask of disabled RBs (CIK).
1768  * Returns the disabled RB bitmask.
1769  */
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771                               u32 max_rb_num, u32 se_num,
1772                               u32 sh_per_se)
1773 {
1774         u32 data, mask;
1775
1776         data = RREG32(CC_RB_BACKEND_DISABLE);
1777         if (data & 1)
1778                 data &= BACKEND_DISABLE_MASK;
1779         else
1780                 data = 0;
1781         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782
1783         data >>= BACKEND_DISABLE_SHIFT;
1784
1785         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786
1787         return data & mask;
1788 }
1789
1790 /**
1791  * cik_setup_rb - setup the RBs on the asic
1792  *
1793  * @rdev: radeon_device pointer
1794  * @se_num: number of SEs (shader engines) for the asic
1795  * @sh_per_se: number of SH blocks per SE for the asic
1796  * @max_rb_num: max RBs (render backends) for the asic
1797  *
1798  * Configures per-SE/SH RB registers (CIK).
1799  */
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801                          u32 se_num, u32 sh_per_se,
1802                          u32 max_rb_num)
1803 {
1804         int i, j;
1805         u32 data, mask;
1806         u32 disabled_rbs = 0;
1807         u32 enabled_rbs = 0;
1808
1809         for (i = 0; i < se_num; i++) {
1810                 for (j = 0; j < sh_per_se; j++) {
1811                         cik_select_se_sh(rdev, i, j);
1812                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814                 }
1815         }
1816         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817
1818         mask = 1;
1819         for (i = 0; i < max_rb_num; i++) {
1820                 if (!(disabled_rbs & mask))
1821                         enabled_rbs |= mask;
1822                 mask <<= 1;
1823         }
1824
1825         for (i = 0; i < se_num; i++) {
1826                 cik_select_se_sh(rdev, i, 0xffffffff);
1827                 data = 0;
1828                 for (j = 0; j < sh_per_se; j++) {
1829                         switch (enabled_rbs & 3) {
1830                         case 1:
1831                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832                                 break;
1833                         case 2:
1834                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835                                 break;
1836                         case 3:
1837                         default:
1838                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839                                 break;
1840                         }
1841                         enabled_rbs >>= 2;
1842                 }
1843                 WREG32(PA_SC_RASTER_CONFIG, data);
1844         }
1845         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846 }
1847
1848 /**
1849  * cik_gpu_init - setup the 3D engine
1850  *
1851  * @rdev: radeon_device pointer
1852  *
1853  * Configures the 3D engine and tiling configuration
1854  * registers so that the 3D engine is usable.
1855  */
1856 static void cik_gpu_init(struct radeon_device *rdev)
1857 {
1858         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859         u32 mc_shared_chmap, mc_arb_ramcfg;
1860         u32 hdp_host_path_cntl;
1861         u32 tmp;
1862         int i, j;
1863
1864         switch (rdev->family) {
1865         case CHIP_BONAIRE:
1866                 rdev->config.cik.max_shader_engines = 2;
1867                 rdev->config.cik.max_tile_pipes = 4;
1868                 rdev->config.cik.max_cu_per_sh = 7;
1869                 rdev->config.cik.max_sh_per_se = 1;
1870                 rdev->config.cik.max_backends_per_se = 2;
1871                 rdev->config.cik.max_texture_channel_caches = 4;
1872                 rdev->config.cik.max_gprs = 256;
1873                 rdev->config.cik.max_gs_threads = 32;
1874                 rdev->config.cik.max_hw_contexts = 8;
1875
1876                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881                 break;
1882         case CHIP_KAVERI:
1883                 /* TODO */
1884                 break;
1885         case CHIP_KABINI:
1886         default:
1887                 rdev->config.cik.max_shader_engines = 1;
1888                 rdev->config.cik.max_tile_pipes = 2;
1889                 rdev->config.cik.max_cu_per_sh = 2;
1890                 rdev->config.cik.max_sh_per_se = 1;
1891                 rdev->config.cik.max_backends_per_se = 1;
1892                 rdev->config.cik.max_texture_channel_caches = 2;
1893                 rdev->config.cik.max_gprs = 256;
1894                 rdev->config.cik.max_gs_threads = 16;
1895                 rdev->config.cik.max_hw_contexts = 8;
1896
1897                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902                 break;
1903         }
1904
1905         /* Initialize HDP */
1906         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907                 WREG32((0x2c14 + j), 0x00000000);
1908                 WREG32((0x2c18 + j), 0x00000000);
1909                 WREG32((0x2c1c + j), 0x00000000);
1910                 WREG32((0x2c20 + j), 0x00000000);
1911                 WREG32((0x2c24 + j), 0x00000000);
1912         }
1913
1914         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915
1916         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917
1918         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920
1921         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922         rdev->config.cik.mem_max_burst_length_bytes = 256;
1923         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925         if (rdev->config.cik.mem_row_size_in_kb > 4)
1926                 rdev->config.cik.mem_row_size_in_kb = 4;
1927         /* XXX use MC settings? */
1928         rdev->config.cik.shader_engine_tile_size = 32;
1929         rdev->config.cik.num_gpus = 1;
1930         rdev->config.cik.multi_gpu_tile_size = 64;
1931
1932         /* fix up row size */
1933         gb_addr_config &= ~ROW_SIZE_MASK;
1934         switch (rdev->config.cik.mem_row_size_in_kb) {
1935         case 1:
1936         default:
1937                 gb_addr_config |= ROW_SIZE(0);
1938                 break;
1939         case 2:
1940                 gb_addr_config |= ROW_SIZE(1);
1941                 break;
1942         case 4:
1943                 gb_addr_config |= ROW_SIZE(2);
1944                 break;
1945         }
1946
1947         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1948          * not have bank info, so create a custom tiling dword.
1949          * bits 3:0   num_pipes
1950          * bits 7:4   num_banks
1951          * bits 11:8  group_size
1952          * bits 15:12 row_size
1953          */
1954         rdev->config.cik.tile_config = 0;
1955         switch (rdev->config.cik.num_tile_pipes) {
1956         case 1:
1957                 rdev->config.cik.tile_config |= (0 << 0);
1958                 break;
1959         case 2:
1960                 rdev->config.cik.tile_config |= (1 << 0);
1961                 break;
1962         case 4:
1963                 rdev->config.cik.tile_config |= (2 << 0);
1964                 break;
1965         case 8:
1966         default:
1967                 /* XXX what about 12? */
1968                 rdev->config.cik.tile_config |= (3 << 0);
1969                 break;
1970         }
1971         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972                 rdev->config.cik.tile_config |= 1 << 4;
1973         else
1974                 rdev->config.cik.tile_config |= 0 << 4;
1975         rdev->config.cik.tile_config |=
1976                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977         rdev->config.cik.tile_config |=
1978                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979
1980         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1988
1989         cik_tiling_mode_table_init(rdev);
1990
1991         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992                      rdev->config.cik.max_sh_per_se,
1993                      rdev->config.cik.max_backends_per_se);
1994
1995         /* set HW defaults for 3D engine */
1996         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997
1998         WREG32(SX_DEBUG_1, 0x20);
1999
2000         WREG32(TA_CNTL_AUX, 0x00010000);
2001
2002         tmp = RREG32(SPI_CONFIG_CNTL);
2003         tmp |= 0x03000000;
2004         WREG32(SPI_CONFIG_CNTL, tmp);
2005
2006         WREG32(SQ_CONFIG, 1);
2007
2008         WREG32(DB_DEBUG, 0);
2009
2010         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011         tmp |= 0x00000400;
2012         WREG32(DB_DEBUG2, tmp);
2013
2014         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015         tmp |= 0x00020200;
2016         WREG32(DB_DEBUG3, tmp);
2017
2018         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019         tmp |= 0x00018208;
2020         WREG32(CB_HW_CONTROL, tmp);
2021
2022         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023
2024         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028
2029         WREG32(VGT_NUM_INSTANCES, 1);
2030
2031         WREG32(CP_PERFMON_CNTL, 0);
2032
2033         WREG32(SQ_CONFIG, 0);
2034
2035         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036                                           FORCE_EOV_MAX_REZ_CNT(255)));
2037
2038         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040
2041         WREG32(VGT_GS_VERTEX_REUSE, 16);
2042         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043
2044         tmp = RREG32(HDP_MISC_CNTL);
2045         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046         WREG32(HDP_MISC_CNTL, tmp);
2047
2048         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050
2051         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053
2054         udelay(50);
2055 }
2056
2057 /*
2058  * GPU scratch registers helpers function.
2059  */
2060 /**
2061  * cik_scratch_init - setup driver info for CP scratch regs
2062  *
2063  * @rdev: radeon_device pointer
2064  *
2065  * Set up the number and offset of the CP scratch registers.
2066  * NOTE: use of CP scratch registers is a legacy inferface and
2067  * is not used by default on newer asics (r6xx+).  On newer asics,
2068  * memory buffers are used for fences rather than scratch regs.
2069  */
2070 static void cik_scratch_init(struct radeon_device *rdev)
2071 {
2072         int i;
2073
2074         rdev->scratch.num_reg = 7;
2075         rdev->scratch.reg_base = SCRATCH_REG0;
2076         for (i = 0; i < rdev->scratch.num_reg; i++) {
2077                 rdev->scratch.free[i] = true;
2078                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079         }
2080 }
2081
2082 /**
2083  * cik_ring_test - basic gfx ring test
2084  *
2085  * @rdev: radeon_device pointer
2086  * @ring: radeon_ring structure holding ring information
2087  *
2088  * Allocate a scratch register and write to it using the gfx ring (CIK).
2089  * Provides a basic gfx ring test to verify that the ring is working.
2090  * Used by cik_cp_gfx_resume();
2091  * Returns 0 on success, error on failure.
2092  */
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094 {
2095         uint32_t scratch;
2096         uint32_t tmp = 0;
2097         unsigned i;
2098         int r;
2099
2100         r = radeon_scratch_get(rdev, &scratch);
2101         if (r) {
2102                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103                 return r;
2104         }
2105         WREG32(scratch, 0xCAFEDEAD);
2106         r = radeon_ring_lock(rdev, ring, 3);
2107         if (r) {
2108                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109                 radeon_scratch_free(rdev, scratch);
2110                 return r;
2111         }
2112         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114         radeon_ring_write(ring, 0xDEADBEEF);
2115         radeon_ring_unlock_commit(rdev, ring);
2116
2117         for (i = 0; i < rdev->usec_timeout; i++) {
2118                 tmp = RREG32(scratch);
2119                 if (tmp == 0xDEADBEEF)
2120                         break;
2121                 DRM_UDELAY(1);
2122         }
2123         if (i < rdev->usec_timeout) {
2124                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125         } else {
2126                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127                           ring->idx, scratch, tmp);
2128                 r = -EINVAL;
2129         }
2130         radeon_scratch_free(rdev, scratch);
2131         return r;
2132 }
2133
2134 /**
2135  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2136  *
2137  * @rdev: radeon_device pointer
2138  * @fence: radeon fence object
2139  *
2140  * Emits a fence sequnce number on the gfx ring and flushes
2141  * GPU caches.
2142  */
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144                              struct radeon_fence *fence)
2145 {
2146         struct radeon_ring *ring = &rdev->ring[fence->ring];
2147         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148
2149         /* EVENT_WRITE_EOP - flush caches, send int */
2150         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152                                  EOP_TC_ACTION_EN |
2153                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154                                  EVENT_INDEX(5)));
2155         radeon_ring_write(ring, addr & 0xfffffffc);
2156         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157         radeon_ring_write(ring, fence->seq);
2158         radeon_ring_write(ring, 0);
2159         /* HDP flush */
2160         /* We should be using the new WAIT_REG_MEM special op packet here
2161          * but it causes the CP to hang
2162          */
2163         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165                                  WRITE_DATA_DST_SEL(0)));
2166         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167         radeon_ring_write(ring, 0);
2168         radeon_ring_write(ring, 0);
2169 }
2170
2171 /**
2172  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173  *
2174  * @rdev: radeon_device pointer
2175  * @fence: radeon fence object
2176  *
2177  * Emits a fence sequnce number on the compute ring and flushes
2178  * GPU caches.
2179  */
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181                                  struct radeon_fence *fence)
2182 {
2183         struct radeon_ring *ring = &rdev->ring[fence->ring];
2184         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185
2186         /* RELEASE_MEM - flush caches, send int */
2187         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189                                  EOP_TC_ACTION_EN |
2190                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191                                  EVENT_INDEX(5)));
2192         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193         radeon_ring_write(ring, addr & 0xfffffffc);
2194         radeon_ring_write(ring, upper_32_bits(addr));
2195         radeon_ring_write(ring, fence->seq);
2196         radeon_ring_write(ring, 0);
2197         /* HDP flush */
2198         /* We should be using the new WAIT_REG_MEM special op packet here
2199          * but it causes the CP to hang
2200          */
2201         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203                                  WRITE_DATA_DST_SEL(0)));
2204         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205         radeon_ring_write(ring, 0);
2206         radeon_ring_write(ring, 0);
2207 }
2208
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210                              struct radeon_ring *ring,
2211                              struct radeon_semaphore *semaphore,
2212                              bool emit_wait)
2213 {
2214         uint64_t addr = semaphore->gpu_addr;
2215         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216
2217         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218         radeon_ring_write(ring, addr & 0xffffffff);
2219         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220 }
2221
2222 /*
2223  * IB stuff
2224  */
2225 /**
2226  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227  *
2228  * @rdev: radeon_device pointer
2229  * @ib: radeon indirect buffer object
2230  *
2231  * Emits an DE (drawing engine) or CE (constant engine) IB
2232  * on the gfx ring.  IBs are usually generated by userspace
2233  * acceleration drivers and submitted to the kernel for
2234  * sheduling on the ring.  This function schedules the IB
2235  * on the gfx ring for execution by the GPU.
2236  */
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238 {
2239         struct radeon_ring *ring = &rdev->ring[ib->ring];
2240         u32 header, control = INDIRECT_BUFFER_VALID;
2241
2242         if (ib->is_const_ib) {
2243                 /* set switch buffer packet before const IB */
2244                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245                 radeon_ring_write(ring, 0);
2246
2247                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248         } else {
2249                 u32 next_rptr;
2250                 if (ring->rptr_save_reg) {
2251                         next_rptr = ring->wptr + 3 + 4;
2252                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2254                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2255                         radeon_ring_write(ring, next_rptr);
2256                 } else if (rdev->wb.enabled) {
2257                         next_rptr = ring->wptr + 5 + 4;
2258                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262                         radeon_ring_write(ring, next_rptr);
2263                 }
2264
2265                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266         }
2267
2268         control |= ib->length_dw |
2269                 (ib->vm ? (ib->vm->id << 24) : 0);
2270
2271         radeon_ring_write(ring, header);
2272         radeon_ring_write(ring,
2273 #ifdef __BIG_ENDIAN
2274                           (2 << 0) |
2275 #endif
2276                           (ib->gpu_addr & 0xFFFFFFFC));
2277         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278         radeon_ring_write(ring, control);
2279 }
2280
2281 /**
2282  * cik_ib_test - basic gfx ring IB test
2283  *
2284  * @rdev: radeon_device pointer
2285  * @ring: radeon_ring structure holding ring information
2286  *
2287  * Allocate an IB and execute it on the gfx ring (CIK).
2288  * Provides a basic gfx ring test to verify that IBs are working.
2289  * Returns 0 on success, error on failure.
2290  */
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292 {
2293         struct radeon_ib ib;
2294         uint32_t scratch;
2295         uint32_t tmp = 0;
2296         unsigned i;
2297         int r;
2298
2299         r = radeon_scratch_get(rdev, &scratch);
2300         if (r) {
2301                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302                 return r;
2303         }
2304         WREG32(scratch, 0xCAFEDEAD);
2305         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306         if (r) {
2307                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308                 return r;
2309         }
2310         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312         ib.ptr[2] = 0xDEADBEEF;
2313         ib.length_dw = 3;
2314         r = radeon_ib_schedule(rdev, &ib, NULL);
2315         if (r) {
2316                 radeon_scratch_free(rdev, scratch);
2317                 radeon_ib_free(rdev, &ib);
2318                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319                 return r;
2320         }
2321         r = radeon_fence_wait(ib.fence, false);
2322         if (r) {
2323                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324                 return r;
2325         }
2326         for (i = 0; i < rdev->usec_timeout; i++) {
2327                 tmp = RREG32(scratch);
2328                 if (tmp == 0xDEADBEEF)
2329                         break;
2330                 DRM_UDELAY(1);
2331         }
2332         if (i < rdev->usec_timeout) {
2333                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334         } else {
2335                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336                           scratch, tmp);
2337                 r = -EINVAL;
2338         }
2339         radeon_scratch_free(rdev, scratch);
2340         radeon_ib_free(rdev, &ib);
2341         return r;
2342 }
2343
2344 /*
2345  * CP.
2346  * On CIK, gfx and compute now have independant command processors.
2347  *
2348  * GFX
2349  * Gfx consists of a single ring and can process both gfx jobs and
2350  * compute jobs.  The gfx CP consists of three microengines (ME):
2351  * PFP - Pre-Fetch Parser
2352  * ME - Micro Engine
2353  * CE - Constant Engine
2354  * The PFP and ME make up what is considered the Drawing Engine (DE).
2355  * The CE is an asynchronous engine used for updating buffer desciptors
2356  * used by the DE so that they can be loaded into cache in parallel
2357  * while the DE is processing state update packets.
2358  *
2359  * Compute
2360  * The compute CP consists of two microengines (ME):
2361  * MEC1 - Compute MicroEngine 1
2362  * MEC2 - Compute MicroEngine 2
2363  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364  * The queues are exposed to userspace and are programmed directly
2365  * by the compute runtime.
2366  */
2367 /**
2368  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369  *
2370  * @rdev: radeon_device pointer
2371  * @enable: enable or disable the MEs
2372  *
2373  * Halts or unhalts the gfx MEs.
2374  */
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376 {
2377         if (enable)
2378                 WREG32(CP_ME_CNTL, 0);
2379         else {
2380                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382         }
2383         udelay(50);
2384 }
2385
2386 /**
2387  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388  *
2389  * @rdev: radeon_device pointer
2390  *
2391  * Loads the gfx PFP, ME, and CE ucode.
2392  * Returns 0 for success, -EINVAL if the ucode is not available.
2393  */
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395 {
2396         const __be32 *fw_data;
2397         int i;
2398
2399         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400                 return -EINVAL;
2401
2402         cik_cp_gfx_enable(rdev, false);
2403
2404         /* PFP */
2405         fw_data = (const __be32 *)rdev->pfp_fw->data;
2406         WREG32(CP_PFP_UCODE_ADDR, 0);
2407         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409         WREG32(CP_PFP_UCODE_ADDR, 0);
2410
2411         /* CE */
2412         fw_data = (const __be32 *)rdev->ce_fw->data;
2413         WREG32(CP_CE_UCODE_ADDR, 0);
2414         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416         WREG32(CP_CE_UCODE_ADDR, 0);
2417
2418         /* ME */
2419         fw_data = (const __be32 *)rdev->me_fw->data;
2420         WREG32(CP_ME_RAM_WADDR, 0);
2421         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423         WREG32(CP_ME_RAM_WADDR, 0);
2424
2425         WREG32(CP_PFP_UCODE_ADDR, 0);
2426         WREG32(CP_CE_UCODE_ADDR, 0);
2427         WREG32(CP_ME_RAM_WADDR, 0);
2428         WREG32(CP_ME_RAM_RADDR, 0);
2429         return 0;
2430 }
2431
2432 /**
2433  * cik_cp_gfx_start - start the gfx ring
2434  *
2435  * @rdev: radeon_device pointer
2436  *
2437  * Enables the ring and loads the clear state context and other
2438  * packets required to init the ring.
2439  * Returns 0 for success, error for failure.
2440  */
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2442 {
2443         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444         int r, i;
2445
2446         /* init the CP */
2447         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448         WREG32(CP_ENDIAN_SWAP, 0);
2449         WREG32(CP_DEVICE_ID, 1);
2450
2451         cik_cp_gfx_enable(rdev, true);
2452
2453         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454         if (r) {
2455                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456                 return r;
2457         }
2458
2459         /* init the CE partitions.  CE only used for gfx on CIK */
2460         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462         radeon_ring_write(ring, 0xc000);
2463         radeon_ring_write(ring, 0xc000);
2464
2465         /* setup clear context state */
2466         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468
2469         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470         radeon_ring_write(ring, 0x80000000);
2471         radeon_ring_write(ring, 0x80000000);
2472
2473         for (i = 0; i < cik_default_size; i++)
2474                 radeon_ring_write(ring, cik_default_state[i]);
2475
2476         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478
2479         /* set clear context state */
2480         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481         radeon_ring_write(ring, 0);
2482
2483         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484         radeon_ring_write(ring, 0x00000316);
2485         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487
2488         radeon_ring_unlock_commit(rdev, ring);
2489
2490         return 0;
2491 }
2492
2493 /**
2494  * cik_cp_gfx_fini - stop the gfx ring
2495  *
2496  * @rdev: radeon_device pointer
2497  *
2498  * Stop the gfx ring and tear down the driver ring
2499  * info.
2500  */
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502 {
2503         cik_cp_gfx_enable(rdev, false);
2504         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505 }
2506
2507 /**
2508  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509  *
2510  * @rdev: radeon_device pointer
2511  *
2512  * Program the location and size of the gfx ring buffer
2513  * and test it to make sure it's working.
2514  * Returns 0 for success, error for failure.
2515  */
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517 {
2518         struct radeon_ring *ring;
2519         u32 tmp;
2520         u32 rb_bufsz;
2521         u64 rb_addr;
2522         int r;
2523
2524         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526
2527         /* Set the write pointer delay */
2528         WREG32(CP_RB_WPTR_DELAY, 0);
2529
2530         /* set the RB to use vmid 0 */
2531         WREG32(CP_RB_VMID, 0);
2532
2533         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534
2535         /* ring 0 - compute and gfx */
2536         /* Set ring buffer size */
2537         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538         rb_bufsz = drm_order(ring->ring_size / 8);
2539         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540 #ifdef __BIG_ENDIAN
2541         tmp |= BUF_SWAP_32BIT;
2542 #endif
2543         WREG32(CP_RB0_CNTL, tmp);
2544
2545         /* Initialize the ring buffer's read and write pointers */
2546         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547         ring->wptr = 0;
2548         WREG32(CP_RB0_WPTR, ring->wptr);
2549
2550         /* set the wb address wether it's enabled or not */
2551         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553
2554         /* scratch register shadowing is no longer supported */
2555         WREG32(SCRATCH_UMSK, 0);
2556
2557         if (!rdev->wb.enabled)
2558                 tmp |= RB_NO_UPDATE;
2559
2560         mdelay(1);
2561         WREG32(CP_RB0_CNTL, tmp);
2562
2563         rb_addr = ring->gpu_addr >> 8;
2564         WREG32(CP_RB0_BASE, rb_addr);
2565         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566
2567         ring->rptr = RREG32(CP_RB0_RPTR);
2568
2569         /* start the ring */
2570         cik_cp_gfx_start(rdev);
2571         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573         if (r) {
2574                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575                 return r;
2576         }
2577         return 0;
2578 }
2579
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581                               struct radeon_ring *ring)
2582 {
2583         u32 rptr;
2584
2585
2586
2587         if (rdev->wb.enabled) {
2588                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589         } else {
2590                 mutex_lock(&rdev->srbm_mutex);
2591                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2592                 rptr = RREG32(CP_HQD_PQ_RPTR);
2593                 cik_srbm_select(rdev, 0, 0, 0, 0);
2594                 mutex_unlock(&rdev->srbm_mutex);
2595         }
2596         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2597
2598         return rptr;
2599 }
2600
2601 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2602                               struct radeon_ring *ring)
2603 {
2604         u32 wptr;
2605
2606         if (rdev->wb.enabled) {
2607                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2608         } else {
2609                 mutex_lock(&rdev->srbm_mutex);
2610                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2611                 wptr = RREG32(CP_HQD_PQ_WPTR);
2612                 cik_srbm_select(rdev, 0, 0, 0, 0);
2613                 mutex_unlock(&rdev->srbm_mutex);
2614         }
2615         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2616
2617         return wptr;
2618 }
2619
2620 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2621                                struct radeon_ring *ring)
2622 {
2623         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2624
2625         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2626         WDOORBELL32(ring->doorbell_offset, wptr);
2627 }
2628
2629 /**
2630  * cik_cp_compute_enable - enable/disable the compute CP MEs
2631  *
2632  * @rdev: radeon_device pointer
2633  * @enable: enable or disable the MEs
2634  *
2635  * Halts or unhalts the compute MEs.
2636  */
2637 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2638 {
2639         if (enable)
2640                 WREG32(CP_MEC_CNTL, 0);
2641         else
2642                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2643         udelay(50);
2644 }
2645
2646 /**
2647  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2648  *
2649  * @rdev: radeon_device pointer
2650  *
2651  * Loads the compute MEC1&2 ucode.
2652  * Returns 0 for success, -EINVAL if the ucode is not available.
2653  */
2654 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2655 {
2656         const __be32 *fw_data;
2657         int i;
2658
2659         if (!rdev->mec_fw)
2660                 return -EINVAL;
2661
2662         cik_cp_compute_enable(rdev, false);
2663
2664         /* MEC1 */
2665         fw_data = (const __be32 *)rdev->mec_fw->data;
2666         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2667         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2668                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2669         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2670
2671         if (rdev->family == CHIP_KAVERI) {
2672                 /* MEC2 */
2673                 fw_data = (const __be32 *)rdev->mec_fw->data;
2674                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2675                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2676                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2677                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2678         }
2679
2680         return 0;
2681 }
2682
2683 /**
2684  * cik_cp_compute_start - start the compute queues
2685  *
2686  * @rdev: radeon_device pointer
2687  *
2688  * Enable the compute queues.
2689  * Returns 0 for success, error for failure.
2690  */
2691 static int cik_cp_compute_start(struct radeon_device *rdev)
2692 {
2693         cik_cp_compute_enable(rdev, true);
2694
2695         return 0;
2696 }
2697
2698 /**
2699  * cik_cp_compute_fini - stop the compute queues
2700  *
2701  * @rdev: radeon_device pointer
2702  *
2703  * Stop the compute queues and tear down the driver queue
2704  * info.
2705  */
2706 static void cik_cp_compute_fini(struct radeon_device *rdev)
2707 {
2708         int i, idx, r;
2709
2710         cik_cp_compute_enable(rdev, false);
2711
2712         for (i = 0; i < 2; i++) {
2713                 if (i == 0)
2714                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2715                 else
2716                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2717
2718                 if (rdev->ring[idx].mqd_obj) {
2719                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2720                         if (unlikely(r != 0))
2721                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2722
2723                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2724                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2725
2726                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2727                         rdev->ring[idx].mqd_obj = NULL;
2728                 }
2729         }
2730 }
2731
2732 static void cik_mec_fini(struct radeon_device *rdev)
2733 {
2734         int r;
2735
2736         if (rdev->mec.hpd_eop_obj) {
2737                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2738                 if (unlikely(r != 0))
2739                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2740                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2741                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2742
2743                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2744                 rdev->mec.hpd_eop_obj = NULL;
2745         }
2746 }
2747
2748 #define MEC_HPD_SIZE 2048
2749
2750 static int cik_mec_init(struct radeon_device *rdev)
2751 {
2752         int r;
2753         u32 *hpd;
2754
2755         /*
2756          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2757          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2758          */
2759         if (rdev->family == CHIP_KAVERI)
2760                 rdev->mec.num_mec = 2;
2761         else
2762                 rdev->mec.num_mec = 1;
2763         rdev->mec.num_pipe = 4;
2764         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2765
2766         if (rdev->mec.hpd_eop_obj == NULL) {
2767                 r = radeon_bo_create(rdev,
2768                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2769                                      PAGE_SIZE, true,
2770                                      RADEON_GEM_DOMAIN_GTT, NULL,
2771                                      &rdev->mec.hpd_eop_obj);
2772                 if (r) {
2773                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2774                         return r;
2775                 }
2776         }
2777
2778         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2779         if (unlikely(r != 0)) {
2780                 cik_mec_fini(rdev);
2781                 return r;
2782         }
2783         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2784                           &rdev->mec.hpd_eop_gpu_addr);
2785         if (r) {
2786                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2787                 cik_mec_fini(rdev);
2788                 return r;
2789         }
2790         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2791         if (r) {
2792                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2793                 cik_mec_fini(rdev);
2794                 return r;
2795         }
2796
2797         /* clear memory.  Not sure if this is required or not */
2798         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2799
2800         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2801         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2802
2803         return 0;
2804 }
2805
2806 struct hqd_registers
2807 {
2808         u32 cp_mqd_base_addr;
2809         u32 cp_mqd_base_addr_hi;
2810         u32 cp_hqd_active;
2811         u32 cp_hqd_vmid;
2812         u32 cp_hqd_persistent_state;
2813         u32 cp_hqd_pipe_priority;
2814         u32 cp_hqd_queue_priority;
2815         u32 cp_hqd_quantum;
2816         u32 cp_hqd_pq_base;
2817         u32 cp_hqd_pq_base_hi;
2818         u32 cp_hqd_pq_rptr;
2819         u32 cp_hqd_pq_rptr_report_addr;
2820         u32 cp_hqd_pq_rptr_report_addr_hi;
2821         u32 cp_hqd_pq_wptr_poll_addr;
2822         u32 cp_hqd_pq_wptr_poll_addr_hi;
2823         u32 cp_hqd_pq_doorbell_control;
2824         u32 cp_hqd_pq_wptr;
2825         u32 cp_hqd_pq_control;
2826         u32 cp_hqd_ib_base_addr;
2827         u32 cp_hqd_ib_base_addr_hi;
2828         u32 cp_hqd_ib_rptr;
2829         u32 cp_hqd_ib_control;
2830         u32 cp_hqd_iq_timer;
2831         u32 cp_hqd_iq_rptr;
2832         u32 cp_hqd_dequeue_request;
2833         u32 cp_hqd_dma_offload;
2834         u32 cp_hqd_sema_cmd;
2835         u32 cp_hqd_msg_type;
2836         u32 cp_hqd_atomic0_preop_lo;
2837         u32 cp_hqd_atomic0_preop_hi;
2838         u32 cp_hqd_atomic1_preop_lo;
2839         u32 cp_hqd_atomic1_preop_hi;
2840         u32 cp_hqd_hq_scheduler0;
2841         u32 cp_hqd_hq_scheduler1;
2842         u32 cp_mqd_control;
2843 };
2844
2845 struct bonaire_mqd
2846 {
2847         u32 header;
2848         u32 dispatch_initiator;
2849         u32 dimensions[3];
2850         u32 start_idx[3];
2851         u32 num_threads[3];
2852         u32 pipeline_stat_enable;
2853         u32 perf_counter_enable;
2854         u32 pgm[2];
2855         u32 tba[2];
2856         u32 tma[2];
2857         u32 pgm_rsrc[2];
2858         u32 vmid;
2859         u32 resource_limits;
2860         u32 static_thread_mgmt01[2];
2861         u32 tmp_ring_size;
2862         u32 static_thread_mgmt23[2];
2863         u32 restart[3];
2864         u32 thread_trace_enable;
2865         u32 reserved1;
2866         u32 user_data[16];
2867         u32 vgtcs_invoke_count[2];
2868         struct hqd_registers queue_state;
2869         u32 dequeue_cntr;
2870         u32 interrupt_queue[64];
2871 };
2872
2873 /**
2874  * cik_cp_compute_resume - setup the compute queue registers
2875  *
2876  * @rdev: radeon_device pointer
2877  *
2878  * Program the compute queues and test them to make sure they
2879  * are working.
2880  * Returns 0 for success, error for failure.
2881  */
2882 static int cik_cp_compute_resume(struct radeon_device *rdev)
2883 {
2884         int r, i, idx;
2885         u32 tmp;
2886         bool use_doorbell = true;
2887         u64 hqd_gpu_addr;
2888         u64 mqd_gpu_addr;
2889         u64 eop_gpu_addr;
2890         u64 wb_gpu_addr;
2891         u32 *buf;
2892         struct bonaire_mqd *mqd;
2893
2894         r = cik_cp_compute_start(rdev);
2895         if (r)
2896                 return r;
2897
2898         /* fix up chicken bits */
2899         tmp = RREG32(CP_CPF_DEBUG);
2900         tmp |= (1 << 23);
2901         WREG32(CP_CPF_DEBUG, tmp);
2902
2903         /* init the pipes */
2904         mutex_lock(&rdev->srbm_mutex);
2905         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2906                 int me = (i < 4) ? 1 : 2;
2907                 int pipe = (i < 4) ? i : (i - 4);
2908
2909                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2910
2911                 cik_srbm_select(rdev, me, pipe, 0, 0);
2912
2913                 /* write the EOP addr */
2914                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2915                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2916
2917                 /* set the VMID assigned */
2918                 WREG32(CP_HPD_EOP_VMID, 0);
2919
2920                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2921                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2922                 tmp &= ~EOP_SIZE_MASK;
2923                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2924                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2925         }
2926         cik_srbm_select(rdev, 0, 0, 0, 0);
2927         mutex_unlock(&rdev->srbm_mutex);
2928
2929         /* init the queues.  Just two for now. */
2930         for (i = 0; i < 2; i++) {
2931                 if (i == 0)
2932                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2933                 else
2934                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2935
2936                 if (rdev->ring[idx].mqd_obj == NULL) {
2937                         r = radeon_bo_create(rdev,
2938                                              sizeof(struct bonaire_mqd),
2939                                              PAGE_SIZE, true,
2940                                              RADEON_GEM_DOMAIN_GTT, NULL,
2941                                              &rdev->ring[idx].mqd_obj);
2942                         if (r) {
2943                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2944                                 return r;
2945                         }
2946                 }
2947
2948                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2949                 if (unlikely(r != 0)) {
2950                         cik_cp_compute_fini(rdev);
2951                         return r;
2952                 }
2953                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2954                                   &mqd_gpu_addr);
2955                 if (r) {
2956                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2957                         cik_cp_compute_fini(rdev);
2958                         return r;
2959                 }
2960                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2961                 if (r) {
2962                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2963                         cik_cp_compute_fini(rdev);
2964                         return r;
2965                 }
2966
2967                 /* doorbell offset */
2968                 rdev->ring[idx].doorbell_offset =
2969                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2970
2971                 /* init the mqd struct */
2972                 memset(buf, 0, sizeof(struct bonaire_mqd));
2973
2974                 mqd = (struct bonaire_mqd *)buf;
2975                 mqd->header = 0xC0310800;
2976                 mqd->static_thread_mgmt01[0] = 0xffffffff;
2977                 mqd->static_thread_mgmt01[1] = 0xffffffff;
2978                 mqd->static_thread_mgmt23[0] = 0xffffffff;
2979                 mqd->static_thread_mgmt23[1] = 0xffffffff;
2980
2981                 mutex_lock(&rdev->srbm_mutex);
2982                 cik_srbm_select(rdev, rdev->ring[idx].me,
2983                                 rdev->ring[idx].pipe,
2984                                 rdev->ring[idx].queue, 0);
2985
2986                 /* disable wptr polling */
2987                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2988                 tmp &= ~WPTR_POLL_EN;
2989                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2990
2991                 /* enable doorbell? */
2992                 mqd->queue_state.cp_hqd_pq_doorbell_control =
2993                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2994                 if (use_doorbell)
2995                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2996                 else
2997                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2998                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2999                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3000
3001                 /* disable the queue if it's active */
3002                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3003                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3004                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3005                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3006                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3007                         for (i = 0; i < rdev->usec_timeout; i++) {
3008                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3009                                         break;
3010                                 udelay(1);
3011                         }
3012                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3013                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3014                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3015                 }
3016
3017                 /* set the pointer to the MQD */
3018                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3019                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3020                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3021                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3022                 /* set MQD vmid to 0 */
3023                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3024                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3025                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3026
3027                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3029                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3030                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3031                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3032                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3033
3034                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3035                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3036                 mqd->queue_state.cp_hqd_pq_control &=
3037                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3038
3039                 mqd->queue_state.cp_hqd_pq_control |=
3040                         drm_order(rdev->ring[idx].ring_size / 8);
3041                 mqd->queue_state.cp_hqd_pq_control |=
3042                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3043 #ifdef __BIG_ENDIAN
3044                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3045 #endif
3046                 mqd->queue_state.cp_hqd_pq_control &=
3047                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3048                 mqd->queue_state.cp_hqd_pq_control |=
3049                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3050                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3051
3052                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3053                 if (i == 0)
3054                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3055                 else
3056                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3057                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3058                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3059                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3060                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3061                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3062
3063                 /* set the wb address wether it's enabled or not */
3064                 if (i == 0)
3065                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3066                 else
3067                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3068                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3069                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3070                         upper_32_bits(wb_gpu_addr) & 0xffff;
3071                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3072                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3073                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3074                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3075
3076                 /* enable the doorbell if requested */
3077                 if (use_doorbell) {
3078                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3079                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3080                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3081                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3082                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3083                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3084                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3085                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3086
3087                 } else {
3088                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3089                 }
3090                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3091                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3092
3093                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3094                 rdev->ring[idx].wptr = 0;
3095                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3096                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3097                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3098                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3099
3100                 /* set the vmid for the queue */
3101                 mqd->queue_state.cp_hqd_vmid = 0;
3102                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3103
3104                 /* activate the queue */
3105                 mqd->queue_state.cp_hqd_active = 1;
3106                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3107
3108                 cik_srbm_select(rdev, 0, 0, 0, 0);
3109                 mutex_unlock(&rdev->srbm_mutex);
3110
3111                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3112                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3113
3114                 rdev->ring[idx].ready = true;
3115                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3116                 if (r)
3117                         rdev->ring[idx].ready = false;
3118         }
3119
3120         return 0;
3121 }
3122
3123 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3124 {
3125         cik_cp_gfx_enable(rdev, enable);
3126         cik_cp_compute_enable(rdev, enable);
3127 }
3128
3129 static int cik_cp_load_microcode(struct radeon_device *rdev)
3130 {
3131         int r;
3132
3133         r = cik_cp_gfx_load_microcode(rdev);
3134         if (r)
3135                 return r;
3136         r = cik_cp_compute_load_microcode(rdev);
3137         if (r)
3138                 return r;
3139
3140         return 0;
3141 }
3142
3143 static void cik_cp_fini(struct radeon_device *rdev)
3144 {
3145         cik_cp_gfx_fini(rdev);
3146         cik_cp_compute_fini(rdev);
3147 }
3148
3149 static int cik_cp_resume(struct radeon_device *rdev)
3150 {
3151         int r;
3152
3153         /* Reset all cp blocks */
3154         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3155         RREG32(GRBM_SOFT_RESET);
3156         mdelay(15);
3157         WREG32(GRBM_SOFT_RESET, 0);
3158         RREG32(GRBM_SOFT_RESET);
3159
3160         r = cik_cp_load_microcode(rdev);
3161         if (r)
3162                 return r;
3163
3164         r = cik_cp_gfx_resume(rdev);
3165         if (r)
3166                 return r;
3167         r = cik_cp_compute_resume(rdev);
3168         if (r)
3169                 return r;
3170
3171         return 0;
3172 }
3173
3174 /*
3175  * sDMA - System DMA
3176  * Starting with CIK, the GPU has new asynchronous
3177  * DMA engines.  These engines are used for compute
3178  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3179  * and each one supports 1 ring buffer used for gfx
3180  * and 2 queues used for compute.
3181  *
3182  * The programming model is very similar to the CP
3183  * (ring buffer, IBs, etc.), but sDMA has it's own
3184  * packet format that is different from the PM4 format
3185  * used by the CP. sDMA supports copying data, writing
3186  * embedded data, solid fills, and a number of other
3187  * things.  It also has support for tiling/detiling of
3188  * buffers.
3189  */
3190 /**
3191  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3192  *
3193  * @rdev: radeon_device pointer
3194  * @ib: IB object to schedule
3195  *
3196  * Schedule an IB in the DMA ring (CIK).
3197  */
3198 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3199                               struct radeon_ib *ib)
3200 {
3201         struct radeon_ring *ring = &rdev->ring[ib->ring];
3202         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3203
3204         if (rdev->wb.enabled) {
3205                 u32 next_rptr = ring->wptr + 5;
3206                 while ((next_rptr & 7) != 4)
3207                         next_rptr++;
3208                 next_rptr += 4;
3209                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3210                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3213                 radeon_ring_write(ring, next_rptr);
3214         }
3215
3216         /* IB packet must end on a 8 DW boundary */
3217         while ((ring->wptr & 7) != 4)
3218                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3219         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3220         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3221         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3222         radeon_ring_write(ring, ib->length_dw);
3223
3224 }
3225
3226 /**
3227  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3228  *
3229  * @rdev: radeon_device pointer
3230  * @fence: radeon fence object
3231  *
3232  * Add a DMA fence packet to the ring to write
3233  * the fence seq number and DMA trap packet to generate
3234  * an interrupt if needed (CIK).
3235  */
3236 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3237                               struct radeon_fence *fence)
3238 {
3239         struct radeon_ring *ring = &rdev->ring[fence->ring];
3240         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3241         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3242                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3243         u32 ref_and_mask;
3244
3245         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3246                 ref_and_mask = SDMA0;
3247         else
3248                 ref_and_mask = SDMA1;
3249
3250         /* write the fence */
3251         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3252         radeon_ring_write(ring, addr & 0xffffffff);
3253         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3254         radeon_ring_write(ring, fence->seq);
3255         /* generate an interrupt */
3256         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3257         /* flush HDP */
3258         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3259         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3260         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3261         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3262         radeon_ring_write(ring, ref_and_mask); /* MASK */
3263         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3264 }
3265
3266 /**
3267  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3268  *
3269  * @rdev: radeon_device pointer
3270  * @ring: radeon_ring structure holding ring information
3271  * @semaphore: radeon semaphore object
3272  * @emit_wait: wait or signal semaphore
3273  *
3274  * Add a DMA semaphore packet to the ring wait on or signal
3275  * other rings (CIK).
3276  */
3277 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3278                                   struct radeon_ring *ring,
3279                                   struct radeon_semaphore *semaphore,
3280                                   bool emit_wait)
3281 {
3282         u64 addr = semaphore->gpu_addr;
3283         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3284
3285         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3286         radeon_ring_write(ring, addr & 0xfffffff8);
3287         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3288 }
3289
3290 /**
3291  * cik_sdma_gfx_stop - stop the gfx async dma engines
3292  *
3293  * @rdev: radeon_device pointer
3294  *
3295  * Stop the gfx async dma ring buffers (CIK).
3296  */
3297 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3298 {
3299         u32 rb_cntl, reg_offset;
3300         int i;
3301
3302         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3303
3304         for (i = 0; i < 2; i++) {
3305                 if (i == 0)
3306                         reg_offset = SDMA0_REGISTER_OFFSET;
3307                 else
3308                         reg_offset = SDMA1_REGISTER_OFFSET;
3309                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3310                 rb_cntl &= ~SDMA_RB_ENABLE;
3311                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3312                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3313         }
3314 }
3315
3316 /**
3317  * cik_sdma_rlc_stop - stop the compute async dma engines
3318  *
3319  * @rdev: radeon_device pointer
3320  *
3321  * Stop the compute async dma queues (CIK).
3322  */
3323 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3324 {
3325         /* XXX todo */
3326 }
3327
3328 /**
3329  * cik_sdma_enable - stop the async dma engines
3330  *
3331  * @rdev: radeon_device pointer
3332  * @enable: enable/disable the DMA MEs.
3333  *
3334  * Halt or unhalt the async dma engines (CIK).
3335  */
3336 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3337 {
3338         u32 me_cntl, reg_offset;
3339         int i;
3340
3341         for (i = 0; i < 2; i++) {
3342                 if (i == 0)
3343                         reg_offset = SDMA0_REGISTER_OFFSET;
3344                 else
3345                         reg_offset = SDMA1_REGISTER_OFFSET;
3346                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3347                 if (enable)
3348                         me_cntl &= ~SDMA_HALT;
3349                 else
3350                         me_cntl |= SDMA_HALT;
3351                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3352         }
3353 }
3354
3355 /**
3356  * cik_sdma_gfx_resume - setup and start the async dma engines
3357  *
3358  * @rdev: radeon_device pointer
3359  *
3360  * Set up the gfx DMA ring buffers and enable them (CIK).
3361  * Returns 0 for success, error for failure.
3362  */
3363 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3364 {
3365         struct radeon_ring *ring;
3366         u32 rb_cntl, ib_cntl;
3367         u32 rb_bufsz;
3368         u32 reg_offset, wb_offset;
3369         int i, r;
3370
3371         for (i = 0; i < 2; i++) {
3372                 if (i == 0) {
3373                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3374                         reg_offset = SDMA0_REGISTER_OFFSET;
3375                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3376                 } else {
3377                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3378                         reg_offset = SDMA1_REGISTER_OFFSET;
3379                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3380                 }
3381
3382                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3383                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3384
3385                 /* Set ring buffer size in dwords */
3386                 rb_bufsz = drm_order(ring->ring_size / 4);
3387                 rb_cntl = rb_bufsz << 1;
3388 #ifdef __BIG_ENDIAN
3389                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3390 #endif
3391                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3392
3393                 /* Initialize the ring buffer's read and write pointers */
3394                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3395                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3396
3397                 /* set the wb address whether it's enabled or not */
3398                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3399                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3400                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3401                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3402
3403                 if (rdev->wb.enabled)
3404                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3405
3406                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3407                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3408
3409                 ring->wptr = 0;
3410                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3411
3412                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3413
3414                 /* enable DMA RB */
3415                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3416
3417                 ib_cntl = SDMA_IB_ENABLE;
3418 #ifdef __BIG_ENDIAN
3419                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3420 #endif
3421                 /* enable DMA IBs */
3422                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3423
3424                 ring->ready = true;
3425
3426                 r = radeon_ring_test(rdev, ring->idx, ring);
3427                 if (r) {
3428                         ring->ready = false;
3429                         return r;
3430                 }
3431         }
3432
3433         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3434
3435         return 0;
3436 }
3437
3438 /**
3439  * cik_sdma_rlc_resume - setup and start the async dma engines
3440  *
3441  * @rdev: radeon_device pointer
3442  *
3443  * Set up the compute DMA queues and enable them (CIK).
3444  * Returns 0 for success, error for failure.
3445  */
3446 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3447 {
3448         /* XXX todo */
3449         return 0;
3450 }
3451
3452 /**
3453  * cik_sdma_load_microcode - load the sDMA ME ucode
3454  *
3455  * @rdev: radeon_device pointer
3456  *
3457  * Loads the sDMA0/1 ucode.
3458  * Returns 0 for success, -EINVAL if the ucode is not available.
3459  */
3460 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3461 {
3462         const __be32 *fw_data;
3463         int i;
3464
3465         if (!rdev->sdma_fw)
3466                 return -EINVAL;
3467
3468         /* stop the gfx rings and rlc compute queues */
3469         cik_sdma_gfx_stop(rdev);
3470         cik_sdma_rlc_stop(rdev);
3471
3472         /* halt the MEs */
3473         cik_sdma_enable(rdev, false);
3474
3475         /* sdma0 */
3476         fw_data = (const __be32 *)rdev->sdma_fw->data;
3477         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3478         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3479                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3480         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3481
3482         /* sdma1 */
3483         fw_data = (const __be32 *)rdev->sdma_fw->data;
3484         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3485         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3486                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3487         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3488
3489         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3490         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3491         return 0;
3492 }
3493
3494 /**
3495  * cik_sdma_resume - setup and start the async dma engines
3496  *
3497  * @rdev: radeon_device pointer
3498  *
3499  * Set up the DMA engines and enable them (CIK).
3500  * Returns 0 for success, error for failure.
3501  */
3502 static int cik_sdma_resume(struct radeon_device *rdev)
3503 {
3504         int r;
3505
3506         /* Reset dma */
3507         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3508         RREG32(SRBM_SOFT_RESET);
3509         udelay(50);
3510         WREG32(SRBM_SOFT_RESET, 0);
3511         RREG32(SRBM_SOFT_RESET);
3512
3513         r = cik_sdma_load_microcode(rdev);
3514         if (r)
3515                 return r;
3516
3517         /* unhalt the MEs */
3518         cik_sdma_enable(rdev, true);
3519
3520         /* start the gfx rings and rlc compute queues */
3521         r = cik_sdma_gfx_resume(rdev);
3522         if (r)
3523                 return r;
3524         r = cik_sdma_rlc_resume(rdev);
3525         if (r)
3526                 return r;
3527
3528         return 0;
3529 }
3530
3531 /**
3532  * cik_sdma_fini - tear down the async dma engines
3533  *
3534  * @rdev: radeon_device pointer
3535  *
3536  * Stop the async dma engines and free the rings (CIK).
3537  */
3538 static void cik_sdma_fini(struct radeon_device *rdev)
3539 {
3540         /* stop the gfx rings and rlc compute queues */
3541         cik_sdma_gfx_stop(rdev);
3542         cik_sdma_rlc_stop(rdev);
3543         /* halt the MEs */
3544         cik_sdma_enable(rdev, false);
3545         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3546         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3547         /* XXX - compute dma queue tear down */
3548 }
3549
3550 /**
3551  * cik_copy_dma - copy pages using the DMA engine
3552  *
3553  * @rdev: radeon_device pointer
3554  * @src_offset: src GPU address
3555  * @dst_offset: dst GPU address
3556  * @num_gpu_pages: number of GPU pages to xfer
3557  * @fence: radeon fence object
3558  *
3559  * Copy GPU paging using the DMA engine (CIK).
3560  * Used by the radeon ttm implementation to move pages if
3561  * registered as the asic copy callback.
3562  */
3563 int cik_copy_dma(struct radeon_device *rdev,
3564                  uint64_t src_offset, uint64_t dst_offset,
3565                  unsigned num_gpu_pages,
3566                  struct radeon_fence **fence)
3567 {
3568         struct radeon_semaphore *sem = NULL;
3569         int ring_index = rdev->asic->copy.dma_ring_index;
3570         struct radeon_ring *ring = &rdev->ring[ring_index];
3571         u32 size_in_bytes, cur_size_in_bytes;
3572         int i, num_loops;
3573         int r = 0;
3574
3575         r = radeon_semaphore_create(rdev, &sem);
3576         if (r) {
3577                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3578                 return r;
3579         }
3580
3581         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3582         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3583         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3584         if (r) {
3585                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3586                 radeon_semaphore_free(rdev, &sem, NULL);
3587                 return r;
3588         }
3589
3590         if (radeon_fence_need_sync(*fence, ring->idx)) {
3591                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3592                                             ring->idx);
3593                 radeon_fence_note_sync(*fence, ring->idx);
3594         } else {
3595                 radeon_semaphore_free(rdev, &sem, NULL);
3596         }
3597
3598         for (i = 0; i < num_loops; i++) {
3599                 cur_size_in_bytes = size_in_bytes;
3600                 if (cur_size_in_bytes > 0x1fffff)
3601                         cur_size_in_bytes = 0x1fffff;
3602                 size_in_bytes -= cur_size_in_bytes;
3603                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3604                 radeon_ring_write(ring, cur_size_in_bytes);
3605                 radeon_ring_write(ring, 0); /* src/dst endian swap */
3606                 radeon_ring_write(ring, src_offset & 0xffffffff);
3607                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3608                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3609                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3610                 src_offset += cur_size_in_bytes;
3611                 dst_offset += cur_size_in_bytes;
3612         }
3613
3614         r = radeon_fence_emit(rdev, fence, ring->idx);
3615         if (r) {
3616                 radeon_ring_unlock_undo(rdev, ring);
3617                 return r;
3618         }
3619
3620         radeon_ring_unlock_commit(rdev, ring);
3621         radeon_semaphore_free(rdev, &sem, *fence);
3622
3623         return r;
3624 }
3625
3626 /**
3627  * cik_sdma_ring_test - simple async dma engine test
3628  *
3629  * @rdev: radeon_device pointer
3630  * @ring: radeon_ring structure holding ring information
3631  *
3632  * Test the DMA engine by writing using it to write an
3633  * value to memory. (CIK).
3634  * Returns 0 for success, error for failure.
3635  */
3636 int cik_sdma_ring_test(struct radeon_device *rdev,
3637                        struct radeon_ring *ring)
3638 {
3639         unsigned i;
3640         int r;
3641         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3642         u32 tmp;
3643
3644         if (!ptr) {
3645                 DRM_ERROR("invalid vram scratch pointer\n");
3646                 return -EINVAL;
3647         }
3648
3649         tmp = 0xCAFEDEAD;
3650         writel(tmp, ptr);
3651
3652         r = radeon_ring_lock(rdev, ring, 4);
3653         if (r) {
3654                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3655                 return r;
3656         }
3657         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3658         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3659         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3660         radeon_ring_write(ring, 1); /* number of DWs to follow */
3661         radeon_ring_write(ring, 0xDEADBEEF);
3662         radeon_ring_unlock_commit(rdev, ring);
3663
3664         for (i = 0; i < rdev->usec_timeout; i++) {
3665                 tmp = readl(ptr);
3666                 if (tmp == 0xDEADBEEF)
3667                         break;
3668                 DRM_UDELAY(1);
3669         }
3670
3671         if (i < rdev->usec_timeout) {
3672                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3673         } else {
3674                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3675                           ring->idx, tmp);
3676                 r = -EINVAL;
3677         }
3678         return r;
3679 }
3680
3681 /**
3682  * cik_sdma_ib_test - test an IB on the DMA engine
3683  *
3684  * @rdev: radeon_device pointer
3685  * @ring: radeon_ring structure holding ring information
3686  *
3687  * Test a simple IB in the DMA ring (CIK).
3688  * Returns 0 on success, error on failure.
3689  */
3690 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3691 {
3692         struct radeon_ib ib;
3693         unsigned i;
3694         int r;
3695         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3696         u32 tmp = 0;
3697
3698         if (!ptr) {
3699                 DRM_ERROR("invalid vram scratch pointer\n");
3700                 return -EINVAL;
3701         }
3702
3703         tmp = 0xCAFEDEAD;
3704         writel(tmp, ptr);
3705
3706         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3707         if (r) {
3708                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3709                 return r;
3710         }
3711
3712         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3713         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3714         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3715         ib.ptr[3] = 1;
3716         ib.ptr[4] = 0xDEADBEEF;
3717         ib.length_dw = 5;
3718
3719         r = radeon_ib_schedule(rdev, &ib, NULL);
3720         if (r) {
3721                 radeon_ib_free(rdev, &ib);
3722                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3723                 return r;
3724         }
3725         r = radeon_fence_wait(ib.fence, false);
3726         if (r) {
3727                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3728                 return r;
3729         }
3730         for (i = 0; i < rdev->usec_timeout; i++) {
3731                 tmp = readl(ptr);
3732                 if (tmp == 0xDEADBEEF)
3733                         break;
3734                 DRM_UDELAY(1);
3735         }
3736         if (i < rdev->usec_timeout) {
3737                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3738         } else {
3739                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3740                 r = -EINVAL;
3741         }
3742         radeon_ib_free(rdev, &ib);
3743         return r;
3744 }
3745
3746
3747 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3748 {
3749         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3750                 RREG32(GRBM_STATUS));
3751         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3752                 RREG32(GRBM_STATUS2));
3753         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3754                 RREG32(GRBM_STATUS_SE0));
3755         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3756                 RREG32(GRBM_STATUS_SE1));
3757         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3758                 RREG32(GRBM_STATUS_SE2));
3759         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3760                 RREG32(GRBM_STATUS_SE3));
3761         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3762                 RREG32(SRBM_STATUS));
3763         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3764                 RREG32(SRBM_STATUS2));
3765         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3766                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3767         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3768                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3769         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3770         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3771                  RREG32(CP_STALLED_STAT1));
3772         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3773                  RREG32(CP_STALLED_STAT2));
3774         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3775                  RREG32(CP_STALLED_STAT3));
3776         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3777                  RREG32(CP_CPF_BUSY_STAT));
3778         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3779                  RREG32(CP_CPF_STALLED_STAT1));
3780         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3781         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3782         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3783                  RREG32(CP_CPC_STALLED_STAT1));
3784         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3785 }
3786
3787 /**
3788  * cik_gpu_check_soft_reset - check which blocks are busy
3789  *
3790  * @rdev: radeon_device pointer
3791  *
3792  * Check which blocks are busy and return the relevant reset
3793  * mask to be used by cik_gpu_soft_reset().
3794  * Returns a mask of the blocks to be reset.
3795  */
3796 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3797 {
3798         u32 reset_mask = 0;
3799         u32 tmp;
3800
3801         /* GRBM_STATUS */
3802         tmp = RREG32(GRBM_STATUS);
3803         if (tmp & (PA_BUSY | SC_BUSY |
3804                    BCI_BUSY | SX_BUSY |
3805                    TA_BUSY | VGT_BUSY |
3806                    DB_BUSY | CB_BUSY |
3807                    GDS_BUSY | SPI_BUSY |
3808                    IA_BUSY | IA_BUSY_NO_DMA))
3809                 reset_mask |= RADEON_RESET_GFX;
3810
3811         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3812                 reset_mask |= RADEON_RESET_CP;
3813
3814         /* GRBM_STATUS2 */
3815         tmp = RREG32(GRBM_STATUS2);
3816         if (tmp & RLC_BUSY)
3817                 reset_mask |= RADEON_RESET_RLC;
3818
3819         /* SDMA0_STATUS_REG */
3820         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3821         if (!(tmp & SDMA_IDLE))
3822                 reset_mask |= RADEON_RESET_DMA;
3823
3824         /* SDMA1_STATUS_REG */
3825         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3826         if (!(tmp & SDMA_IDLE))
3827                 reset_mask |= RADEON_RESET_DMA1;
3828
3829         /* SRBM_STATUS2 */
3830         tmp = RREG32(SRBM_STATUS2);
3831         if (tmp & SDMA_BUSY)
3832                 reset_mask |= RADEON_RESET_DMA;
3833
3834         if (tmp & SDMA1_BUSY)
3835                 reset_mask |= RADEON_RESET_DMA1;
3836
3837         /* SRBM_STATUS */
3838         tmp = RREG32(SRBM_STATUS);
3839
3840         if (tmp & IH_BUSY)
3841                 reset_mask |= RADEON_RESET_IH;
3842
3843         if (tmp & SEM_BUSY)
3844                 reset_mask |= RADEON_RESET_SEM;
3845
3846         if (tmp & GRBM_RQ_PENDING)
3847                 reset_mask |= RADEON_RESET_GRBM;
3848
3849         if (tmp & VMC_BUSY)
3850                 reset_mask |= RADEON_RESET_VMC;
3851
3852         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3853                    MCC_BUSY | MCD_BUSY))
3854                 reset_mask |= RADEON_RESET_MC;
3855
3856         if (evergreen_is_display_hung(rdev))
3857                 reset_mask |= RADEON_RESET_DISPLAY;
3858
3859         /* Skip MC reset as it's mostly likely not hung, just busy */
3860         if (reset_mask & RADEON_RESET_MC) {
3861                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3862                 reset_mask &= ~RADEON_RESET_MC;
3863         }
3864
3865         return reset_mask;
3866 }
3867
3868 /**
3869  * cik_gpu_soft_reset - soft reset GPU
3870  *
3871  * @rdev: radeon_device pointer
3872  * @reset_mask: mask of which blocks to reset
3873  *
3874  * Soft reset the blocks specified in @reset_mask.
3875  */
3876 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3877 {
3878         struct evergreen_mc_save save;
3879         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3880         u32 tmp;
3881
3882         if (reset_mask == 0)
3883                 return;
3884
3885         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3886
3887         cik_print_gpu_status_regs(rdev);
3888         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3889                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3890         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3891                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3892
3893         /* stop the rlc */
3894         cik_rlc_stop(rdev);
3895
3896         /* Disable GFX parsing/prefetching */
3897         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3898
3899         /* Disable MEC parsing/prefetching */
3900         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3901
3902         if (reset_mask & RADEON_RESET_DMA) {
3903                 /* sdma0 */
3904                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3905                 tmp |= SDMA_HALT;
3906                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3907         }
3908         if (reset_mask & RADEON_RESET_DMA1) {
3909                 /* sdma1 */
3910                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3911                 tmp |= SDMA_HALT;
3912                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3913         }
3914
3915         evergreen_mc_stop(rdev, &save);
3916         if (evergreen_mc_wait_for_idle(rdev)) {
3917                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3918         }
3919
3920         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3921                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3922
3923         if (reset_mask & RADEON_RESET_CP) {
3924                 grbm_soft_reset |= SOFT_RESET_CP;
3925
3926                 srbm_soft_reset |= SOFT_RESET_GRBM;
3927         }
3928
3929         if (reset_mask & RADEON_RESET_DMA)
3930                 srbm_soft_reset |= SOFT_RESET_SDMA;
3931
3932         if (reset_mask & RADEON_RESET_DMA1)
3933                 srbm_soft_reset |= SOFT_RESET_SDMA1;
3934
3935         if (reset_mask & RADEON_RESET_DISPLAY)
3936                 srbm_soft_reset |= SOFT_RESET_DC;
3937
3938         if (reset_mask & RADEON_RESET_RLC)
3939                 grbm_soft_reset |= SOFT_RESET_RLC;
3940
3941         if (reset_mask & RADEON_RESET_SEM)
3942                 srbm_soft_reset |= SOFT_RESET_SEM;
3943
3944         if (reset_mask & RADEON_RESET_IH)
3945                 srbm_soft_reset |= SOFT_RESET_IH;
3946
3947         if (reset_mask & RADEON_RESET_GRBM)
3948                 srbm_soft_reset |= SOFT_RESET_GRBM;
3949
3950         if (reset_mask & RADEON_RESET_VMC)
3951                 srbm_soft_reset |= SOFT_RESET_VMC;
3952
3953         if (!(rdev->flags & RADEON_IS_IGP)) {
3954                 if (reset_mask & RADEON_RESET_MC)
3955                         srbm_soft_reset |= SOFT_RESET_MC;
3956         }
3957
3958         if (grbm_soft_reset) {
3959                 tmp = RREG32(GRBM_SOFT_RESET);
3960                 tmp |= grbm_soft_reset;
3961                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3962                 WREG32(GRBM_SOFT_RESET, tmp);
3963                 tmp = RREG32(GRBM_SOFT_RESET);
3964
3965                 udelay(50);
3966
3967                 tmp &= ~grbm_soft_reset;
3968                 WREG32(GRBM_SOFT_RESET, tmp);
3969                 tmp = RREG32(GRBM_SOFT_RESET);
3970         }
3971
3972         if (srbm_soft_reset) {
3973                 tmp = RREG32(SRBM_SOFT_RESET);
3974                 tmp |= srbm_soft_reset;
3975                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3976                 WREG32(SRBM_SOFT_RESET, tmp);
3977                 tmp = RREG32(SRBM_SOFT_RESET);
3978
3979                 udelay(50);
3980
3981                 tmp &= ~srbm_soft_reset;
3982                 WREG32(SRBM_SOFT_RESET, tmp);
3983                 tmp = RREG32(SRBM_SOFT_RESET);
3984         }
3985
3986         /* Wait a little for things to settle down */
3987         udelay(50);
3988
3989         evergreen_mc_resume(rdev, &save);
3990         udelay(50);
3991
3992         cik_print_gpu_status_regs(rdev);
3993 }
3994
3995 /**
3996  * cik_asic_reset - soft reset GPU
3997  *
3998  * @rdev: radeon_device pointer
3999  *
4000  * Look up which blocks are hung and attempt
4001  * to reset them.
4002  * Returns 0 for success.
4003  */
4004 int cik_asic_reset(struct radeon_device *rdev)
4005 {
4006         u32 reset_mask;
4007
4008         reset_mask = cik_gpu_check_soft_reset(rdev);
4009
4010         if (reset_mask)
4011                 r600_set_bios_scratch_engine_hung(rdev, true);
4012
4013         cik_gpu_soft_reset(rdev, reset_mask);
4014
4015         reset_mask = cik_gpu_check_soft_reset(rdev);
4016
4017         if (!reset_mask)
4018                 r600_set_bios_scratch_engine_hung(rdev, false);
4019
4020         return 0;
4021 }
4022
4023 /**
4024  * cik_gfx_is_lockup - check if the 3D engine is locked up
4025  *
4026  * @rdev: radeon_device pointer
4027  * @ring: radeon_ring structure holding ring information
4028  *
4029  * Check if the 3D engine is locked up (CIK).
4030  * Returns true if the engine is locked, false if not.
4031  */
4032 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4033 {
4034         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4035
4036         if (!(reset_mask & (RADEON_RESET_GFX |
4037                             RADEON_RESET_COMPUTE |
4038                             RADEON_RESET_CP))) {
4039                 radeon_ring_lockup_update(ring);
4040                 return false;
4041         }
4042         /* force CP activities */
4043         radeon_ring_force_activity(rdev, ring);
4044         return radeon_ring_test_lockup(rdev, ring);
4045 }
4046
4047 /**
4048  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4049  *
4050  * @rdev: radeon_device pointer
4051  * @ring: radeon_ring structure holding ring information
4052  *
4053  * Check if the async DMA engine is locked up (CIK).
4054  * Returns true if the engine appears to be locked up, false if not.
4055  */
4056 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4057 {
4058         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4059         u32 mask;
4060
4061         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4062                 mask = RADEON_RESET_DMA;
4063         else
4064                 mask = RADEON_RESET_DMA1;
4065
4066         if (!(reset_mask & mask)) {
4067                 radeon_ring_lockup_update(ring);
4068                 return false;
4069         }
4070         /* force ring activities */
4071         radeon_ring_force_activity(rdev, ring);
4072         return radeon_ring_test_lockup(rdev, ring);
4073 }
4074
4075 /* MC */
4076 /**
4077  * cik_mc_program - program the GPU memory controller
4078  *
4079  * @rdev: radeon_device pointer
4080  *
4081  * Set the location of vram, gart, and AGP in the GPU's
4082  * physical address space (CIK).
4083  */
4084 static void cik_mc_program(struct radeon_device *rdev)
4085 {
4086         struct evergreen_mc_save save;
4087         u32 tmp;
4088         int i, j;
4089
4090         /* Initialize HDP */
4091         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4092                 WREG32((0x2c14 + j), 0x00000000);
4093                 WREG32((0x2c18 + j), 0x00000000);
4094                 WREG32((0x2c1c + j), 0x00000000);
4095                 WREG32((0x2c20 + j), 0x00000000);
4096                 WREG32((0x2c24 + j), 0x00000000);
4097         }
4098         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4099
4100         evergreen_mc_stop(rdev, &save);
4101         if (radeon_mc_wait_for_idle(rdev)) {
4102                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4103         }
4104         /* Lockout access through VGA aperture*/
4105         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4106         /* Update configuration */
4107         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4108                rdev->mc.vram_start >> 12);
4109         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4110                rdev->mc.vram_end >> 12);
4111         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4112                rdev->vram_scratch.gpu_addr >> 12);
4113         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4114         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4115         WREG32(MC_VM_FB_LOCATION, tmp);
4116         /* XXX double check these! */
4117         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4118         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4119         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4120         WREG32(MC_VM_AGP_BASE, 0);
4121         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4122         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4123         if (radeon_mc_wait_for_idle(rdev)) {
4124                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4125         }
4126         evergreen_mc_resume(rdev, &save);
4127         /* we need to own VRAM, so turn off the VGA renderer here
4128          * to stop it overwriting our objects */
4129         rv515_vga_render_disable(rdev);
4130 }
4131
4132 /**
4133  * cik_mc_init - initialize the memory controller driver params
4134  *
4135  * @rdev: radeon_device pointer
4136  *
4137  * Look up the amount of vram, vram width, and decide how to place
4138  * vram and gart within the GPU's physical address space (CIK).
4139  * Returns 0 for success.
4140  */
4141 static int cik_mc_init(struct radeon_device *rdev)
4142 {
4143         u32 tmp;
4144         int chansize, numchan;
4145
4146         /* Get VRAM informations */
4147         rdev->mc.vram_is_ddr = true;
4148         tmp = RREG32(MC_ARB_RAMCFG);
4149         if (tmp & CHANSIZE_MASK) {
4150                 chansize = 64;
4151         } else {
4152                 chansize = 32;
4153         }
4154         tmp = RREG32(MC_SHARED_CHMAP);
4155         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4156         case 0:
4157         default:
4158                 numchan = 1;
4159                 break;
4160         case 1:
4161                 numchan = 2;
4162                 break;
4163         case 2:
4164                 numchan = 4;
4165                 break;
4166         case 3:
4167                 numchan = 8;
4168                 break;
4169         case 4:
4170                 numchan = 3;
4171                 break;
4172         case 5:
4173                 numchan = 6;
4174                 break;
4175         case 6:
4176                 numchan = 10;
4177                 break;
4178         case 7:
4179                 numchan = 12;
4180                 break;
4181         case 8:
4182                 numchan = 16;
4183                 break;
4184         }
4185         rdev->mc.vram_width = numchan * chansize;
4186         /* Could aper size report 0 ? */
4187         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4188         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4189         /* size in MB on si */
4190         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4191         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4192         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4193         si_vram_gtt_location(rdev, &rdev->mc);
4194         radeon_update_bandwidth_info(rdev);
4195
4196         return 0;
4197 }
4198
4199 /*
4200  * GART
4201  * VMID 0 is the physical GPU addresses as used by the kernel.
4202  * VMIDs 1-15 are used for userspace clients and are handled
4203  * by the radeon vm/hsa code.
4204  */
4205 /**
4206  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4207  *
4208  * @rdev: radeon_device pointer
4209  *
4210  * Flush the TLB for the VMID 0 page table (CIK).
4211  */
4212 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4213 {
4214         /* flush hdp cache */
4215         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4216
4217         /* bits 0-15 are the VM contexts0-15 */
4218         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4219 }
4220
4221 /**
4222  * cik_pcie_gart_enable - gart enable
4223  *
4224  * @rdev: radeon_device pointer
4225  *
4226  * This sets up the TLBs, programs the page tables for VMID0,
4227  * sets up the hw for VMIDs 1-15 which are allocated on
4228  * demand, and sets up the global locations for the LDS, GDS,
4229  * and GPUVM for FSA64 clients (CIK).
4230  * Returns 0 for success, errors for failure.
4231  */
4232 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4233 {
4234         int r, i;
4235
4236         if (rdev->gart.robj == NULL) {
4237                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4238                 return -EINVAL;
4239         }
4240         r = radeon_gart_table_vram_pin(rdev);
4241         if (r)
4242                 return r;
4243         radeon_gart_restore(rdev);
4244         /* Setup TLB control */
4245         WREG32(MC_VM_MX_L1_TLB_CNTL,
4246                (0xA << 7) |
4247                ENABLE_L1_TLB |
4248                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4249                ENABLE_ADVANCED_DRIVER_MODEL |
4250                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4251         /* Setup L2 cache */
4252         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4253                ENABLE_L2_FRAGMENT_PROCESSING |
4254                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4255                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4256                EFFECTIVE_L2_QUEUE_SIZE(7) |
4257                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4258         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4259         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4260                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4261         /* setup context0 */
4262         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4263         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4264         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4265         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4266                         (u32)(rdev->dummy_page.addr >> 12));
4267         WREG32(VM_CONTEXT0_CNTL2, 0);
4268         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4269                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4270
4271         WREG32(0x15D4, 0);
4272         WREG32(0x15D8, 0);
4273         WREG32(0x15DC, 0);
4274
4275         /* empty context1-15 */
4276         /* FIXME start with 4G, once using 2 level pt switch to full
4277          * vm size space
4278          */
4279         /* set vm size, must be a multiple of 4 */
4280         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4281         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4282         for (i = 1; i < 16; i++) {
4283                 if (i < 8)
4284                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4285                                rdev->gart.table_addr >> 12);
4286                 else
4287                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4288                                rdev->gart.table_addr >> 12);
4289         }
4290
4291         /* enable context1-15 */
4292         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4293                (u32)(rdev->dummy_page.addr >> 12));
4294         WREG32(VM_CONTEXT1_CNTL2, 4);
4295         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4296                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4298                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4300                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4301                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4302                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4303                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4304                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4305                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4306                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4307                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4308
4309         /* TC cache setup ??? */
4310         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4311         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4312         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4313
4314         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4315         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4316         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4317         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4318         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4319
4320         WREG32(TC_CFG_L1_VOLATILE, 0);
4321         WREG32(TC_CFG_L2_VOLATILE, 0);
4322
4323         if (rdev->family == CHIP_KAVERI) {
4324                 u32 tmp = RREG32(CHUB_CONTROL);
4325                 tmp &= ~BYPASS_VM;
4326                 WREG32(CHUB_CONTROL, tmp);
4327         }
4328
4329         /* XXX SH_MEM regs */
4330         /* where to put LDS, scratch, GPUVM in FSA64 space */
4331         mutex_lock(&rdev->srbm_mutex);
4332         for (i = 0; i < 16; i++) {
4333                 cik_srbm_select(rdev, 0, 0, 0, i);
4334                 /* CP and shaders */
4335                 WREG32(SH_MEM_CONFIG, 0);
4336                 WREG32(SH_MEM_APE1_BASE, 1);
4337                 WREG32(SH_MEM_APE1_LIMIT, 0);
4338                 WREG32(SH_MEM_BASES, 0);
4339                 /* SDMA GFX */
4340                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4341                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4342                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4343                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4344                 /* XXX SDMA RLC - todo */
4345         }
4346         cik_srbm_select(rdev, 0, 0, 0, 0);
4347         mutex_unlock(&rdev->srbm_mutex);
4348
4349         cik_pcie_gart_tlb_flush(rdev);
4350         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4351                  (unsigned)(rdev->mc.gtt_size >> 20),
4352                  (unsigned long long)rdev->gart.table_addr);
4353         rdev->gart.ready = true;
4354         return 0;
4355 }
4356
4357 /**
4358  * cik_pcie_gart_disable - gart disable
4359  *
4360  * @rdev: radeon_device pointer
4361  *
4362  * This disables all VM page table (CIK).
4363  */
4364 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4365 {
4366         /* Disable all tables */
4367         WREG32(VM_CONTEXT0_CNTL, 0);
4368         WREG32(VM_CONTEXT1_CNTL, 0);
4369         /* Setup TLB control */
4370         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4371                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4372         /* Setup L2 cache */
4373         WREG32(VM_L2_CNTL,
4374                ENABLE_L2_FRAGMENT_PROCESSING |
4375                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4376                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4377                EFFECTIVE_L2_QUEUE_SIZE(7) |
4378                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4379         WREG32(VM_L2_CNTL2, 0);
4380         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4381                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4382         radeon_gart_table_vram_unpin(rdev);
4383 }
4384
4385 /**
4386  * cik_pcie_gart_fini - vm fini callback
4387  *
4388  * @rdev: radeon_device pointer
4389  *
4390  * Tears down the driver GART/VM setup (CIK).
4391  */
4392 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4393 {
4394         cik_pcie_gart_disable(rdev);
4395         radeon_gart_table_vram_free(rdev);
4396         radeon_gart_fini(rdev);
4397 }
4398
4399 /* vm parser */
4400 /**
4401  * cik_ib_parse - vm ib_parse callback
4402  *
4403  * @rdev: radeon_device pointer
4404  * @ib: indirect buffer pointer
4405  *
4406  * CIK uses hw IB checking so this is a nop (CIK).
4407  */
4408 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4409 {
4410         return 0;
4411 }
4412
4413 /*
4414  * vm
4415  * VMID 0 is the physical GPU addresses as used by the kernel.
4416  * VMIDs 1-15 are used for userspace clients and are handled
4417  * by the radeon vm/hsa code.
4418  */
4419 /**
4420  * cik_vm_init - cik vm init callback
4421  *
4422  * @rdev: radeon_device pointer
4423  *
4424  * Inits cik specific vm parameters (number of VMs, base of vram for
4425  * VMIDs 1-15) (CIK).
4426  * Returns 0 for success.
4427  */
4428 int cik_vm_init(struct radeon_device *rdev)
4429 {
4430         /* number of VMs */
4431         rdev->vm_manager.nvm = 16;
4432         /* base offset of vram pages */
4433         if (rdev->flags & RADEON_IS_IGP) {
4434                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4435                 tmp <<= 22;
4436                 rdev->vm_manager.vram_base_offset = tmp;
4437         } else
4438                 rdev->vm_manager.vram_base_offset = 0;
4439
4440         return 0;
4441 }
4442
4443 /**
4444  * cik_vm_fini - cik vm fini callback
4445  *
4446  * @rdev: radeon_device pointer
4447  *
4448  * Tear down any asic specific VM setup (CIK).
4449  */
4450 void cik_vm_fini(struct radeon_device *rdev)
4451 {
4452 }
4453
4454 /**
4455  * cik_vm_decode_fault - print human readable fault info
4456  *
4457  * @rdev: radeon_device pointer
4458  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4459  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4460  *
4461  * Print human readable fault information (CIK).
4462  */
4463 static void cik_vm_decode_fault(struct radeon_device *rdev,
4464                                 u32 status, u32 addr, u32 mc_client)
4465 {
4466         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4467         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4468         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4469         char *block = (char *)&mc_client;
4470
4471         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4472                protections, vmid, addr,
4473                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4474                block, mc_id);
4475 }
4476
4477 /**
4478  * cik_vm_flush - cik vm flush using the CP
4479  *
4480  * @rdev: radeon_device pointer
4481  *
4482  * Update the page table base and flush the VM TLB
4483  * using the CP (CIK).
4484  */
4485 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4486 {
4487         struct radeon_ring *ring = &rdev->ring[ridx];
4488
4489         if (vm == NULL)
4490                 return;
4491
4492         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4493         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4494                                  WRITE_DATA_DST_SEL(0)));
4495         if (vm->id < 8) {
4496                 radeon_ring_write(ring,
4497                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4498         } else {
4499                 radeon_ring_write(ring,
4500                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4501         }
4502         radeon_ring_write(ring, 0);
4503         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4504
4505         /* update SH_MEM_* regs */
4506         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4507         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4508                                  WRITE_DATA_DST_SEL(0)));
4509         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4510         radeon_ring_write(ring, 0);
4511         radeon_ring_write(ring, VMID(vm->id));
4512
4513         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4514         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4515                                  WRITE_DATA_DST_SEL(0)));
4516         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4517         radeon_ring_write(ring, 0);
4518
4519         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4520         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4521         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4522         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4523
4524         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4525         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4526                                  WRITE_DATA_DST_SEL(0)));
4527         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4528         radeon_ring_write(ring, 0);
4529         radeon_ring_write(ring, VMID(0));
4530
4531         /* HDP flush */
4532         /* We should be using the WAIT_REG_MEM packet here like in
4533          * cik_fence_ring_emit(), but it causes the CP to hang in this
4534          * context...
4535          */
4536         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4537         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4538                                  WRITE_DATA_DST_SEL(0)));
4539         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4540         radeon_ring_write(ring, 0);
4541         radeon_ring_write(ring, 0);
4542
4543         /* bits 0-15 are the VM contexts0-15 */
4544         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4545         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4546                                  WRITE_DATA_DST_SEL(0)));
4547         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4548         radeon_ring_write(ring, 0);
4549         radeon_ring_write(ring, 1 << vm->id);
4550
4551         /* compute doesn't have PFP */
4552         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4553                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4554                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4555                 radeon_ring_write(ring, 0x0);
4556         }
4557 }
4558
4559 /**
4560  * cik_vm_set_page - update the page tables using sDMA
4561  *
4562  * @rdev: radeon_device pointer
4563  * @ib: indirect buffer to fill with commands
4564  * @pe: addr of the page entry
4565  * @addr: dst addr to write into pe
4566  * @count: number of page entries to update
4567  * @incr: increase next addr by incr bytes
4568  * @flags: access flags
4569  *
4570  * Update the page tables using CP or sDMA (CIK).
4571  */
4572 void cik_vm_set_page(struct radeon_device *rdev,
4573                      struct radeon_ib *ib,
4574                      uint64_t pe,
4575                      uint64_t addr, unsigned count,
4576                      uint32_t incr, uint32_t flags)
4577 {
4578         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4579         uint64_t value;
4580         unsigned ndw;
4581
4582         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4583                 /* CP */
4584                 while (count) {
4585                         ndw = 2 + count * 2;
4586                         if (ndw > 0x3FFE)
4587                                 ndw = 0x3FFE;
4588
4589                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4590                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4591                                                     WRITE_DATA_DST_SEL(1));
4592                         ib->ptr[ib->length_dw++] = pe;
4593                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4594                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4595                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4596                                         value = radeon_vm_map_gart(rdev, addr);
4597                                         value &= 0xFFFFFFFFFFFFF000ULL;
4598                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4599                                         value = addr;
4600                                 } else {
4601                                         value = 0;
4602                                 }
4603                                 addr += incr;
4604                                 value |= r600_flags;
4605                                 ib->ptr[ib->length_dw++] = value;
4606                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4607                         }
4608                 }
4609         } else {
4610                 /* DMA */
4611                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4612                         while (count) {
4613                                 ndw = count * 2;
4614                                 if (ndw > 0xFFFFE)
4615                                         ndw = 0xFFFFE;
4616
4617                                 /* for non-physically contiguous pages (system) */
4618                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4619                                 ib->ptr[ib->length_dw++] = pe;
4620                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4621                                 ib->ptr[ib->length_dw++] = ndw;
4622                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4623                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4624                                                 value = radeon_vm_map_gart(rdev, addr);
4625                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4626                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4627                                                 value = addr;
4628                                         } else {
4629                                                 value = 0;
4630                                         }
4631                                         addr += incr;
4632                                         value |= r600_flags;
4633                                         ib->ptr[ib->length_dw++] = value;
4634                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4635                                 }
4636                         }
4637                 } else {
4638                         while (count) {
4639                                 ndw = count;
4640                                 if (ndw > 0x7FFFF)
4641                                         ndw = 0x7FFFF;
4642
4643                                 if (flags & RADEON_VM_PAGE_VALID)
4644                                         value = addr;
4645                                 else
4646                                         value = 0;
4647                                 /* for physically contiguous pages (vram) */
4648                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4649                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4650                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4651                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4652                                 ib->ptr[ib->length_dw++] = 0;
4653                                 ib->ptr[ib->length_dw++] = value; /* value */
4654                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4655                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4656                                 ib->ptr[ib->length_dw++] = 0;
4657                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4658                                 pe += ndw * 8;
4659                                 addr += ndw * incr;
4660                                 count -= ndw;
4661                         }
4662                 }
4663                 while (ib->length_dw & 0x7)
4664                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4665         }
4666 }
4667
4668 /**
4669  * cik_dma_vm_flush - cik vm flush using sDMA
4670  *
4671  * @rdev: radeon_device pointer
4672  *
4673  * Update the page table base and flush the VM TLB
4674  * using sDMA (CIK).
4675  */
4676 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4677 {
4678         struct radeon_ring *ring = &rdev->ring[ridx];
4679         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4680                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4681         u32 ref_and_mask;
4682
4683         if (vm == NULL)
4684                 return;
4685
4686         if (ridx == R600_RING_TYPE_DMA_INDEX)
4687                 ref_and_mask = SDMA0;
4688         else
4689                 ref_and_mask = SDMA1;
4690
4691         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4692         if (vm->id < 8) {
4693                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4694         } else {
4695                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4696         }
4697         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4698
4699         /* update SH_MEM_* regs */
4700         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4701         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4702         radeon_ring_write(ring, VMID(vm->id));
4703
4704         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4705         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4706         radeon_ring_write(ring, 0);
4707
4708         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4709         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4710         radeon_ring_write(ring, 0);
4711
4712         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4713         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4714         radeon_ring_write(ring, 1);
4715
4716         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4717         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4718         radeon_ring_write(ring, 0);
4719
4720         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4721         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4722         radeon_ring_write(ring, VMID(0));
4723
4724         /* flush HDP */
4725         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4726         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4727         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4728         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4729         radeon_ring_write(ring, ref_and_mask); /* MASK */
4730         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4731
4732         /* flush TLB */
4733         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4734         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4735         radeon_ring_write(ring, 1 << vm->id);
4736 }
4737
4738 /*
4739  * RLC
4740  * The RLC is a multi-purpose microengine that handles a
4741  * variety of functions, the most important of which is
4742  * the interrupt controller.
4743  */
4744 /**
4745  * cik_rlc_stop - stop the RLC ME
4746  *
4747  * @rdev: radeon_device pointer
4748  *
4749  * Halt the RLC ME (MicroEngine) (CIK).
4750  */
4751 static void cik_rlc_stop(struct radeon_device *rdev)
4752 {
4753         int i, j, k;
4754         u32 mask, tmp;
4755
4756         tmp = RREG32(CP_INT_CNTL_RING0);
4757         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4758         WREG32(CP_INT_CNTL_RING0, tmp);
4759
4760         RREG32(CB_CGTT_SCLK_CTRL);
4761         RREG32(CB_CGTT_SCLK_CTRL);
4762         RREG32(CB_CGTT_SCLK_CTRL);
4763         RREG32(CB_CGTT_SCLK_CTRL);
4764
4765         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4766         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4767
4768         WREG32(RLC_CNTL, 0);
4769
4770         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4771                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4772                         cik_select_se_sh(rdev, i, j);
4773                         for (k = 0; k < rdev->usec_timeout; k++) {
4774                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4775                                         break;
4776                                 udelay(1);
4777                         }
4778                 }
4779         }
4780         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4781
4782         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4783         for (k = 0; k < rdev->usec_timeout; k++) {
4784                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4785                         break;
4786                 udelay(1);
4787         }
4788 }
4789
4790 /**
4791  * cik_rlc_start - start the RLC ME
4792  *
4793  * @rdev: radeon_device pointer
4794  *
4795  * Unhalt the RLC ME (MicroEngine) (CIK).
4796  */
4797 static void cik_rlc_start(struct radeon_device *rdev)
4798 {
4799         u32 tmp;
4800
4801         WREG32(RLC_CNTL, RLC_ENABLE);
4802
4803         tmp = RREG32(CP_INT_CNTL_RING0);
4804         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4805         WREG32(CP_INT_CNTL_RING0, tmp);
4806
4807         udelay(50);
4808 }
4809
4810 /**
4811  * cik_rlc_resume - setup the RLC hw
4812  *
4813  * @rdev: radeon_device pointer
4814  *
4815  * Initialize the RLC registers, load the ucode,
4816  * and start the RLC (CIK).
4817  * Returns 0 for success, -EINVAL if the ucode is not available.
4818  */
4819 static int cik_rlc_resume(struct radeon_device *rdev)
4820 {
4821         u32 i, size;
4822         u32 clear_state_info[3];
4823         const __be32 *fw_data;
4824
4825         if (!rdev->rlc_fw)
4826                 return -EINVAL;
4827
4828         switch (rdev->family) {
4829         case CHIP_BONAIRE:
4830         default:
4831                 size = BONAIRE_RLC_UCODE_SIZE;
4832                 break;
4833         case CHIP_KAVERI:
4834                 size = KV_RLC_UCODE_SIZE;
4835                 break;
4836         case CHIP_KABINI:
4837                 size = KB_RLC_UCODE_SIZE;
4838                 break;
4839         }
4840
4841         cik_rlc_stop(rdev);
4842
4843         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4844         RREG32(GRBM_SOFT_RESET);
4845         udelay(50);
4846         WREG32(GRBM_SOFT_RESET, 0);
4847         RREG32(GRBM_SOFT_RESET);
4848         udelay(50);
4849
4850         WREG32(RLC_LB_CNTR_INIT, 0);
4851         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4852
4853         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4854         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4855         WREG32(RLC_LB_PARAMS, 0x00600408);
4856         WREG32(RLC_LB_CNTL, 0x80000004);
4857
4858         WREG32(RLC_MC_CNTL, 0);
4859         WREG32(RLC_UCODE_CNTL, 0);
4860
4861         fw_data = (const __be32 *)rdev->rlc_fw->data;
4862                 WREG32(RLC_GPM_UCODE_ADDR, 0);
4863         for (i = 0; i < size; i++)
4864                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4865         WREG32(RLC_GPM_UCODE_ADDR, 0);
4866
4867         /* XXX */
4868         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4869         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4870         clear_state_info[2] = 0;//cik_default_size;
4871         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4872         for (i = 0; i < 3; i++)
4873                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4874         WREG32(RLC_DRIVER_DMA_STATUS, 0);
4875
4876         cik_rlc_start(rdev);
4877
4878         return 0;
4879 }
4880
4881 /*
4882  * Interrupts
4883  * Starting with r6xx, interrupts are handled via a ring buffer.
4884  * Ring buffers are areas of GPU accessible memory that the GPU
4885  * writes interrupt vectors into and the host reads vectors out of.
4886  * There is a rptr (read pointer) that determines where the
4887  * host is currently reading, and a wptr (write pointer)
4888  * which determines where the GPU has written.  When the
4889  * pointers are equal, the ring is idle.  When the GPU
4890  * writes vectors to the ring buffer, it increments the
4891  * wptr.  When there is an interrupt, the host then starts
4892  * fetching commands and processing them until the pointers are
4893  * equal again at which point it updates the rptr.
4894  */
4895
4896 /**
4897  * cik_enable_interrupts - Enable the interrupt ring buffer
4898  *
4899  * @rdev: radeon_device pointer
4900  *
4901  * Enable the interrupt ring buffer (CIK).
4902  */
4903 static void cik_enable_interrupts(struct radeon_device *rdev)
4904 {
4905         u32 ih_cntl = RREG32(IH_CNTL);
4906         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4907
4908         ih_cntl |= ENABLE_INTR;
4909         ih_rb_cntl |= IH_RB_ENABLE;
4910         WREG32(IH_CNTL, ih_cntl);
4911         WREG32(IH_RB_CNTL, ih_rb_cntl);
4912         rdev->ih.enabled = true;
4913 }
4914
4915 /**
4916  * cik_disable_interrupts - Disable the interrupt ring buffer
4917  *
4918  * @rdev: radeon_device pointer
4919  *
4920  * Disable the interrupt ring buffer (CIK).
4921  */
4922 static void cik_disable_interrupts(struct radeon_device *rdev)
4923 {
4924         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4925         u32 ih_cntl = RREG32(IH_CNTL);
4926
4927         ih_rb_cntl &= ~IH_RB_ENABLE;
4928         ih_cntl &= ~ENABLE_INTR;
4929         WREG32(IH_RB_CNTL, ih_rb_cntl);
4930         WREG32(IH_CNTL, ih_cntl);
4931         /* set rptr, wptr to 0 */
4932         WREG32(IH_RB_RPTR, 0);
4933         WREG32(IH_RB_WPTR, 0);
4934         rdev->ih.enabled = false;
4935         rdev->ih.rptr = 0;
4936 }
4937
4938 /**
4939  * cik_disable_interrupt_state - Disable all interrupt sources
4940  *
4941  * @rdev: radeon_device pointer
4942  *
4943  * Clear all interrupt enable bits used by the driver (CIK).
4944  */
4945 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4946 {
4947         u32 tmp;
4948
4949         /* gfx ring */
4950         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4951         /* sdma */
4952         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4953         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4954         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4955         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4956         /* compute queues */
4957         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4958         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4959         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4960         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4961         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4962         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4963         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4964         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4965         /* grbm */
4966         WREG32(GRBM_INT_CNTL, 0);
4967         /* vline/vblank, etc. */
4968         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4969         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4970         if (rdev->num_crtc >= 4) {
4971                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4972                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4973         }
4974         if (rdev->num_crtc >= 6) {
4975                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4976                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4977         }
4978
4979         /* dac hotplug */
4980         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4981
4982         /* digital hotplug */
4983         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984         WREG32(DC_HPD1_INT_CONTROL, tmp);
4985         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4986         WREG32(DC_HPD2_INT_CONTROL, tmp);
4987         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4988         WREG32(DC_HPD3_INT_CONTROL, tmp);
4989         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4990         WREG32(DC_HPD4_INT_CONTROL, tmp);
4991         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4992         WREG32(DC_HPD5_INT_CONTROL, tmp);
4993         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4994         WREG32(DC_HPD6_INT_CONTROL, tmp);
4995
4996 }
4997
4998 /**
4999  * cik_irq_init - init and enable the interrupt ring
5000  *
5001  * @rdev: radeon_device pointer
5002  *
5003  * Allocate a ring buffer for the interrupt controller,
5004  * enable the RLC, disable interrupts, enable the IH
5005  * ring buffer and enable it (CIK).
5006  * Called at device load and reume.
5007  * Returns 0 for success, errors for failure.
5008  */
5009 static int cik_irq_init(struct radeon_device *rdev)
5010 {
5011         int ret = 0;
5012         int rb_bufsz;
5013         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5014
5015         /* allocate ring */
5016         ret = r600_ih_ring_alloc(rdev);
5017         if (ret)
5018                 return ret;
5019
5020         /* disable irqs */
5021         cik_disable_interrupts(rdev);
5022
5023         /* init rlc */
5024         ret = cik_rlc_resume(rdev);
5025         if (ret) {
5026                 r600_ih_ring_fini(rdev);
5027                 return ret;
5028         }
5029
5030         /* setup interrupt control */
5031         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5032         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5033         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5034         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5035          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5036          */
5037         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5038         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5039         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5040         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5041
5042         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5043         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5044
5045         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5046                       IH_WPTR_OVERFLOW_CLEAR |
5047                       (rb_bufsz << 1));
5048
5049         if (rdev->wb.enabled)
5050                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5051
5052         /* set the writeback address whether it's enabled or not */
5053         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5054         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5055
5056         WREG32(IH_RB_CNTL, ih_rb_cntl);
5057
5058         /* set rptr, wptr to 0 */
5059         WREG32(IH_RB_RPTR, 0);
5060         WREG32(IH_RB_WPTR, 0);
5061
5062         /* Default settings for IH_CNTL (disabled at first) */
5063         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5064         /* RPTR_REARM only works if msi's are enabled */
5065         if (rdev->msi_enabled)
5066                 ih_cntl |= RPTR_REARM;
5067         WREG32(IH_CNTL, ih_cntl);
5068
5069         /* force the active interrupt state to all disabled */
5070         cik_disable_interrupt_state(rdev);
5071
5072         pci_set_master(rdev->pdev);
5073
5074         /* enable irqs */
5075         cik_enable_interrupts(rdev);
5076
5077         return ret;
5078 }
5079
5080 /**
5081  * cik_irq_set - enable/disable interrupt sources
5082  *
5083  * @rdev: radeon_device pointer
5084  *
5085  * Enable interrupt sources on the GPU (vblanks, hpd,
5086  * etc.) (CIK).
5087  * Returns 0 for success, errors for failure.
5088  */
5089 int cik_irq_set(struct radeon_device *rdev)
5090 {
5091         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5092                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5093         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5094         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5095         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5096         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5097         u32 grbm_int_cntl = 0;
5098         u32 dma_cntl, dma_cntl1;
5099
5100         if (!rdev->irq.installed) {
5101                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5102                 return -EINVAL;
5103         }
5104         /* don't enable anything if the ih is disabled */
5105         if (!rdev->ih.enabled) {
5106                 cik_disable_interrupts(rdev);
5107                 /* force the active interrupt state to all disabled */
5108                 cik_disable_interrupt_state(rdev);
5109                 return 0;
5110         }
5111
5112         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5114         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5115         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5116         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5117         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5118
5119         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5120         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5121
5122         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5126         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5127         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5128         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5129         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5130
5131         /* enable CP interrupts on all rings */
5132         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5133                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5134                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5135         }
5136         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5137                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5138                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5139                 if (ring->me == 1) {
5140                         switch (ring->pipe) {
5141                         case 0:
5142                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5143                                 break;
5144                         case 1:
5145                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5146                                 break;
5147                         case 2:
5148                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5149                                 break;
5150                         case 3:
5151                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5152                                 break;
5153                         default:
5154                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5155                                 break;
5156                         }
5157                 } else if (ring->me == 2) {
5158                         switch (ring->pipe) {
5159                         case 0:
5160                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5161                                 break;
5162                         case 1:
5163                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5164                                 break;
5165                         case 2:
5166                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5167                                 break;
5168                         case 3:
5169                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5170                                 break;
5171                         default:
5172                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5173                                 break;
5174                         }
5175                 } else {
5176                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5177                 }
5178         }
5179         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5180                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5181                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5182                 if (ring->me == 1) {
5183                         switch (ring->pipe) {
5184                         case 0:
5185                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5186                                 break;
5187                         case 1:
5188                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5189                                 break;
5190                         case 2:
5191                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5192                                 break;
5193                         case 3:
5194                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5195                                 break;
5196                         default:
5197                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5198                                 break;
5199                         }
5200                 } else if (ring->me == 2) {
5201                         switch (ring->pipe) {
5202                         case 0:
5203                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5204                                 break;
5205                         case 1:
5206                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5207                                 break;
5208                         case 2:
5209                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5210                                 break;
5211                         case 3:
5212                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5213                                 break;
5214                         default:
5215                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5216                                 break;
5217                         }
5218                 } else {
5219                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5220                 }
5221         }
5222
5223         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5224                 DRM_DEBUG("cik_irq_set: sw int dma\n");
5225                 dma_cntl |= TRAP_ENABLE;
5226         }
5227
5228         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5229                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5230                 dma_cntl1 |= TRAP_ENABLE;
5231         }
5232
5233         if (rdev->irq.crtc_vblank_int[0] ||
5234             atomic_read(&rdev->irq.pflip[0])) {
5235                 DRM_DEBUG("cik_irq_set: vblank 0\n");
5236                 crtc1 |= VBLANK_INTERRUPT_MASK;
5237         }
5238         if (rdev->irq.crtc_vblank_int[1] ||
5239             atomic_read(&rdev->irq.pflip[1])) {
5240                 DRM_DEBUG("cik_irq_set: vblank 1\n");
5241                 crtc2 |= VBLANK_INTERRUPT_MASK;
5242         }
5243         if (rdev->irq.crtc_vblank_int[2] ||
5244             atomic_read(&rdev->irq.pflip[2])) {
5245                 DRM_DEBUG("cik_irq_set: vblank 2\n");
5246                 crtc3 |= VBLANK_INTERRUPT_MASK;
5247         }
5248         if (rdev->irq.crtc_vblank_int[3] ||
5249             atomic_read(&rdev->irq.pflip[3])) {
5250                 DRM_DEBUG("cik_irq_set: vblank 3\n");
5251                 crtc4 |= VBLANK_INTERRUPT_MASK;
5252         }
5253         if (rdev->irq.crtc_vblank_int[4] ||
5254             atomic_read(&rdev->irq.pflip[4])) {
5255                 DRM_DEBUG("cik_irq_set: vblank 4\n");
5256                 crtc5 |= VBLANK_INTERRUPT_MASK;
5257         }
5258         if (rdev->irq.crtc_vblank_int[5] ||
5259             atomic_read(&rdev->irq.pflip[5])) {
5260                 DRM_DEBUG("cik_irq_set: vblank 5\n");
5261                 crtc6 |= VBLANK_INTERRUPT_MASK;
5262         }
5263         if (rdev->irq.hpd[0]) {
5264                 DRM_DEBUG("cik_irq_set: hpd 1\n");
5265                 hpd1 |= DC_HPDx_INT_EN;
5266         }
5267         if (rdev->irq.hpd[1]) {
5268                 DRM_DEBUG("cik_irq_set: hpd 2\n");
5269                 hpd2 |= DC_HPDx_INT_EN;
5270         }
5271         if (rdev->irq.hpd[2]) {
5272                 DRM_DEBUG("cik_irq_set: hpd 3\n");
5273                 hpd3 |= DC_HPDx_INT_EN;
5274         }
5275         if (rdev->irq.hpd[3]) {
5276                 DRM_DEBUG("cik_irq_set: hpd 4\n");
5277                 hpd4 |= DC_HPDx_INT_EN;
5278         }
5279         if (rdev->irq.hpd[4]) {
5280                 DRM_DEBUG("cik_irq_set: hpd 5\n");
5281                 hpd5 |= DC_HPDx_INT_EN;
5282         }
5283         if (rdev->irq.hpd[5]) {
5284                 DRM_DEBUG("cik_irq_set: hpd 6\n");
5285                 hpd6 |= DC_HPDx_INT_EN;
5286         }
5287
5288         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5289
5290         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5291         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5292
5293         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5294         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5295         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5296         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5297         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5298         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5299         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5300         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5301
5302         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5303
5304         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5305         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5306         if (rdev->num_crtc >= 4) {
5307                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5308                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5309         }
5310         if (rdev->num_crtc >= 6) {
5311                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5312                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5313         }
5314
5315         WREG32(DC_HPD1_INT_CONTROL, hpd1);
5316         WREG32(DC_HPD2_INT_CONTROL, hpd2);
5317         WREG32(DC_HPD3_INT_CONTROL, hpd3);
5318         WREG32(DC_HPD4_INT_CONTROL, hpd4);
5319         WREG32(DC_HPD5_INT_CONTROL, hpd5);
5320         WREG32(DC_HPD6_INT_CONTROL, hpd6);
5321
5322         return 0;
5323 }
5324
5325 /**
5326  * cik_irq_ack - ack interrupt sources
5327  *
5328  * @rdev: radeon_device pointer
5329  *
5330  * Ack interrupt sources on the GPU (vblanks, hpd,
5331  * etc.) (CIK).  Certain interrupts sources are sw
5332  * generated and do not require an explicit ack.
5333  */
5334 static inline void cik_irq_ack(struct radeon_device *rdev)
5335 {
5336         u32 tmp;
5337
5338         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5339         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5340         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5341         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5342         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5343         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5344         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5345
5346         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5347                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5348         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5349                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5350         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5351                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5352         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5353                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5354
5355         if (rdev->num_crtc >= 4) {
5356                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5357                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5358                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5359                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5360                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5361                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5362                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5363                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5364         }
5365
5366         if (rdev->num_crtc >= 6) {
5367                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5368                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5369                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5370                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5371                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5372                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5373                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5374                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5375         }
5376
5377         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5378                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5379                 tmp |= DC_HPDx_INT_ACK;
5380                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5381         }
5382         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5383                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5384                 tmp |= DC_HPDx_INT_ACK;
5385                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5386         }
5387         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5388                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5389                 tmp |= DC_HPDx_INT_ACK;
5390                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5391         }
5392         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5393                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5394                 tmp |= DC_HPDx_INT_ACK;
5395                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5396         }
5397         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5398                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5399                 tmp |= DC_HPDx_INT_ACK;
5400                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5401         }
5402         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5403                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5404                 tmp |= DC_HPDx_INT_ACK;
5405                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5406         }
5407 }
5408
5409 /**
5410  * cik_irq_disable - disable interrupts
5411  *
5412  * @rdev: radeon_device pointer
5413  *
5414  * Disable interrupts on the hw (CIK).
5415  */
5416 static void cik_irq_disable(struct radeon_device *rdev)
5417 {
5418         cik_disable_interrupts(rdev);
5419         /* Wait and acknowledge irq */
5420         mdelay(1);
5421         cik_irq_ack(rdev);
5422         cik_disable_interrupt_state(rdev);
5423 }
5424
5425 /**
5426  * cik_irq_disable - disable interrupts for suspend
5427  *
5428  * @rdev: radeon_device pointer
5429  *
5430  * Disable interrupts and stop the RLC (CIK).
5431  * Used for suspend.
5432  */
5433 static void cik_irq_suspend(struct radeon_device *rdev)
5434 {
5435         cik_irq_disable(rdev);
5436         cik_rlc_stop(rdev);
5437 }
5438
5439 /**
5440  * cik_irq_fini - tear down interrupt support
5441  *
5442  * @rdev: radeon_device pointer
5443  *
5444  * Disable interrupts on the hw and free the IH ring
5445  * buffer (CIK).
5446  * Used for driver unload.
5447  */
5448 static void cik_irq_fini(struct radeon_device *rdev)
5449 {
5450         cik_irq_suspend(rdev);
5451         r600_ih_ring_fini(rdev);
5452 }
5453
5454 /**
5455  * cik_get_ih_wptr - get the IH ring buffer wptr
5456  *
5457  * @rdev: radeon_device pointer
5458  *
5459  * Get the IH ring buffer wptr from either the register
5460  * or the writeback memory buffer (CIK).  Also check for
5461  * ring buffer overflow and deal with it.
5462  * Used by cik_irq_process().
5463  * Returns the value of the wptr.
5464  */
5465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5466 {
5467         u32 wptr, tmp;
5468
5469         if (rdev->wb.enabled)
5470                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5471         else
5472                 wptr = RREG32(IH_RB_WPTR);
5473
5474         if (wptr & RB_OVERFLOW) {
5475                 /* When a ring buffer overflow happen start parsing interrupt
5476                  * from the last not overwritten vector (wptr + 16). Hopefully
5477                  * this should allow us to catchup.
5478                  */
5479                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5480                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5481                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5482                 tmp = RREG32(IH_RB_CNTL);
5483                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5484                 WREG32(IH_RB_CNTL, tmp);
5485         }
5486         return (wptr & rdev->ih.ptr_mask);
5487 }
5488
5489 /*        CIK IV Ring
5490  * Each IV ring entry is 128 bits:
5491  * [7:0]    - interrupt source id
5492  * [31:8]   - reserved
5493  * [59:32]  - interrupt source data
5494  * [63:60]  - reserved
5495  * [71:64]  - RINGID
5496  *            CP:
5497  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5498  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5499  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5500  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5501  *            PIPE_ID - ME0 0=3D
5502  *                    - ME1&2 compute dispatcher (4 pipes each)
5503  *            SDMA:
5504  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5505  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5506  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5507  * [79:72]  - VMID
5508  * [95:80]  - PASID
5509  * [127:96] - reserved
5510  */
5511 /**
5512  * cik_irq_process - interrupt handler
5513  *
5514  * @rdev: radeon_device pointer
5515  *
5516  * Interrupt hander (CIK).  Walk the IH ring,
5517  * ack interrupts and schedule work to handle
5518  * interrupt events.
5519  * Returns irq process return code.
5520  */
5521 int cik_irq_process(struct radeon_device *rdev)
5522 {
5523         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5524         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5525         u32 wptr;
5526         u32 rptr;
5527         u32 src_id, src_data, ring_id;
5528         u8 me_id, pipe_id, queue_id;
5529         u32 ring_index;
5530         bool queue_hotplug = false;
5531         bool queue_reset = false;
5532         u32 addr, status, mc_client;
5533
5534         if (!rdev->ih.enabled || rdev->shutdown)
5535                 return IRQ_NONE;
5536
5537         wptr = cik_get_ih_wptr(rdev);
5538
5539 restart_ih:
5540         /* is somebody else already processing irqs? */
5541         if (atomic_xchg(&rdev->ih.lock, 1))
5542                 return IRQ_NONE;
5543
5544         rptr = rdev->ih.rptr;
5545         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5546
5547         /* Order reading of wptr vs. reading of IH ring data */
5548         rmb();
5549
5550         /* display interrupts */
5551         cik_irq_ack(rdev);
5552
5553         while (rptr != wptr) {
5554                 /* wptr/rptr are in bytes! */
5555                 ring_index = rptr / 4;
5556                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5557                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5558                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5559
5560                 switch (src_id) {
5561                 case 1: /* D1 vblank/vline */
5562                         switch (src_data) {
5563                         case 0: /* D1 vblank */
5564                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5565                                         if (rdev->irq.crtc_vblank_int[0]) {
5566                                                 drm_handle_vblank(rdev->ddev, 0);
5567                                                 rdev->pm.vblank_sync = true;
5568                                                 wake_up(&rdev->irq.vblank_queue);
5569                                         }
5570                                         if (atomic_read(&rdev->irq.pflip[0]))
5571                                                 radeon_crtc_handle_flip(rdev, 0);
5572                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5573                                         DRM_DEBUG("IH: D1 vblank\n");
5574                                 }
5575                                 break;
5576                         case 1: /* D1 vline */
5577                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5578                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5579                                         DRM_DEBUG("IH: D1 vline\n");
5580                                 }
5581                                 break;
5582                         default:
5583                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5584                                 break;
5585                         }
5586                         break;
5587                 case 2: /* D2 vblank/vline */
5588                         switch (src_data) {
5589                         case 0: /* D2 vblank */
5590                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5591                                         if (rdev->irq.crtc_vblank_int[1]) {
5592                                                 drm_handle_vblank(rdev->ddev, 1);
5593                                                 rdev->pm.vblank_sync = true;
5594                                                 wake_up(&rdev->irq.vblank_queue);
5595                                         }
5596                                         if (atomic_read(&rdev->irq.pflip[1]))
5597                                                 radeon_crtc_handle_flip(rdev, 1);
5598                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5599                                         DRM_DEBUG("IH: D2 vblank\n");
5600                                 }
5601                                 break;
5602                         case 1: /* D2 vline */
5603                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5604                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5605                                         DRM_DEBUG("IH: D2 vline\n");
5606                                 }
5607                                 break;
5608                         default:
5609                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5610                                 break;
5611                         }
5612                         break;
5613                 case 3: /* D3 vblank/vline */
5614                         switch (src_data) {
5615                         case 0: /* D3 vblank */
5616                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5617                                         if (rdev->irq.crtc_vblank_int[2]) {
5618                                                 drm_handle_vblank(rdev->ddev, 2);
5619                                                 rdev->pm.vblank_sync = true;
5620                                                 wake_up(&rdev->irq.vblank_queue);
5621                                         }
5622                                         if (atomic_read(&rdev->irq.pflip[2]))
5623                                                 radeon_crtc_handle_flip(rdev, 2);
5624                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5625                                         DRM_DEBUG("IH: D3 vblank\n");
5626                                 }
5627                                 break;
5628                         case 1: /* D3 vline */
5629                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5630                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5631                                         DRM_DEBUG("IH: D3 vline\n");
5632                                 }
5633                                 break;
5634                         default:
5635                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5636                                 break;
5637                         }
5638                         break;
5639                 case 4: /* D4 vblank/vline */
5640                         switch (src_data) {
5641                         case 0: /* D4 vblank */
5642                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5643                                         if (rdev->irq.crtc_vblank_int[3]) {
5644                                                 drm_handle_vblank(rdev->ddev, 3);
5645                                                 rdev->pm.vblank_sync = true;
5646                                                 wake_up(&rdev->irq.vblank_queue);
5647                                         }
5648                                         if (atomic_read(&rdev->irq.pflip[3]))
5649                                                 radeon_crtc_handle_flip(rdev, 3);
5650                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5651                                         DRM_DEBUG("IH: D4 vblank\n");
5652                                 }
5653                                 break;
5654                         case 1: /* D4 vline */
5655                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5656                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5657                                         DRM_DEBUG("IH: D4 vline\n");
5658                                 }
5659                                 break;
5660                         default:
5661                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5662                                 break;
5663                         }
5664                         break;
5665                 case 5: /* D5 vblank/vline */
5666                         switch (src_data) {
5667                         case 0: /* D5 vblank */
5668                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5669                                         if (rdev->irq.crtc_vblank_int[4]) {
5670                                                 drm_handle_vblank(rdev->ddev, 4);
5671                                                 rdev->pm.vblank_sync = true;
5672                                                 wake_up(&rdev->irq.vblank_queue);
5673                                         }
5674                                         if (atomic_read(&rdev->irq.pflip[4]))
5675                                                 radeon_crtc_handle_flip(rdev, 4);
5676                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5677                                         DRM_DEBUG("IH: D5 vblank\n");
5678                                 }
5679                                 break;
5680                         case 1: /* D5 vline */
5681                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5682                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5683                                         DRM_DEBUG("IH: D5 vline\n");
5684                                 }
5685                                 break;
5686                         default:
5687                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5688                                 break;
5689                         }
5690                         break;
5691                 case 6: /* D6 vblank/vline */
5692                         switch (src_data) {
5693                         case 0: /* D6 vblank */
5694                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5695                                         if (rdev->irq.crtc_vblank_int[5]) {
5696                                                 drm_handle_vblank(rdev->ddev, 5);
5697                                                 rdev->pm.vblank_sync = true;
5698                                                 wake_up(&rdev->irq.vblank_queue);
5699                                         }
5700                                         if (atomic_read(&rdev->irq.pflip[5]))
5701                                                 radeon_crtc_handle_flip(rdev, 5);
5702                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5703                                         DRM_DEBUG("IH: D6 vblank\n");
5704                                 }
5705                                 break;
5706                         case 1: /* D6 vline */
5707                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5708                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5709                                         DRM_DEBUG("IH: D6 vline\n");
5710                                 }
5711                                 break;
5712                         default:
5713                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5714                                 break;
5715                         }
5716                         break;
5717                 case 42: /* HPD hotplug */
5718                         switch (src_data) {
5719                         case 0:
5720                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5721                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5722                                         queue_hotplug = true;
5723                                         DRM_DEBUG("IH: HPD1\n");
5724                                 }
5725                                 break;
5726                         case 1:
5727                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5728                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5729                                         queue_hotplug = true;
5730                                         DRM_DEBUG("IH: HPD2\n");
5731                                 }
5732                                 break;
5733                         case 2:
5734                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5735                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5736                                         queue_hotplug = true;
5737                                         DRM_DEBUG("IH: HPD3\n");
5738                                 }
5739                                 break;
5740                         case 3:
5741                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5742                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5743                                         queue_hotplug = true;
5744                                         DRM_DEBUG("IH: HPD4\n");
5745                                 }
5746                                 break;
5747                         case 4:
5748                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5749                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5750                                         queue_hotplug = true;
5751                                         DRM_DEBUG("IH: HPD5\n");
5752                                 }
5753                                 break;
5754                         case 5:
5755                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5756                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5757                                         queue_hotplug = true;
5758                                         DRM_DEBUG("IH: HPD6\n");
5759                                 }
5760                                 break;
5761                         default:
5762                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5763                                 break;
5764                         }
5765                         break;
5766                 case 146:
5767                 case 147:
5768                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5769                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5770                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5771                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5772                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5773                                 addr);
5774                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5775                                 status);
5776                         cik_vm_decode_fault(rdev, status, addr, mc_client);
5777                         /* reset addr and status */
5778                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5779                         break;
5780                 case 176: /* GFX RB CP_INT */
5781                 case 177: /* GFX IB CP_INT */
5782                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5783                         break;
5784                 case 181: /* CP EOP event */
5785                         DRM_DEBUG("IH: CP EOP\n");
5786                         /* XXX check the bitfield order! */
5787                         me_id = (ring_id & 0x60) >> 5;
5788                         pipe_id = (ring_id & 0x18) >> 3;
5789                         queue_id = (ring_id & 0x7) >> 0;
5790                         switch (me_id) {
5791                         case 0:
5792                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5793                                 break;
5794                         case 1:
5795                         case 2:
5796                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5797                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5798                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5799                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5800                                 break;
5801                         }
5802                         break;
5803                 case 184: /* CP Privileged reg access */
5804                         DRM_ERROR("Illegal register access in command stream\n");
5805                         /* XXX check the bitfield order! */
5806                         me_id = (ring_id & 0x60) >> 5;
5807                         pipe_id = (ring_id & 0x18) >> 3;
5808                         queue_id = (ring_id & 0x7) >> 0;
5809                         switch (me_id) {
5810                         case 0:
5811                                 /* This results in a full GPU reset, but all we need to do is soft
5812                                  * reset the CP for gfx
5813                                  */
5814                                 queue_reset = true;
5815                                 break;
5816                         case 1:
5817                                 /* XXX compute */
5818                                 queue_reset = true;
5819                                 break;
5820                         case 2:
5821                                 /* XXX compute */
5822                                 queue_reset = true;
5823                                 break;
5824                         }
5825                         break;
5826                 case 185: /* CP Privileged inst */
5827                         DRM_ERROR("Illegal instruction in command stream\n");
5828                         /* XXX check the bitfield order! */
5829                         me_id = (ring_id & 0x60) >> 5;
5830                         pipe_id = (ring_id & 0x18) >> 3;
5831                         queue_id = (ring_id & 0x7) >> 0;
5832                         switch (me_id) {
5833                         case 0:
5834                                 /* This results in a full GPU reset, but all we need to do is soft
5835                                  * reset the CP for gfx
5836                                  */
5837                                 queue_reset = true;
5838                                 break;
5839                         case 1:
5840                                 /* XXX compute */
5841                                 queue_reset = true;
5842                                 break;
5843                         case 2:
5844                                 /* XXX compute */
5845                                 queue_reset = true;
5846                                 break;
5847                         }
5848                         break;
5849                 case 224: /* SDMA trap event */
5850                         /* XXX check the bitfield order! */
5851                         me_id = (ring_id & 0x3) >> 0;
5852                         queue_id = (ring_id & 0xc) >> 2;
5853                         DRM_DEBUG("IH: SDMA trap\n");
5854                         switch (me_id) {
5855                         case 0:
5856                                 switch (queue_id) {
5857                                 case 0:
5858                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5859                                         break;
5860                                 case 1:
5861                                         /* XXX compute */
5862                                         break;
5863                                 case 2:
5864                                         /* XXX compute */
5865                                         break;
5866                                 }
5867                                 break;
5868                         case 1:
5869                                 switch (queue_id) {
5870                                 case 0:
5871                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5872                                         break;
5873                                 case 1:
5874                                         /* XXX compute */
5875                                         break;
5876                                 case 2:
5877                                         /* XXX compute */
5878                                         break;
5879                                 }
5880                                 break;
5881                         }
5882                         break;
5883                 case 241: /* SDMA Privileged inst */
5884                 case 247: /* SDMA Privileged inst */
5885                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
5886                         /* XXX check the bitfield order! */
5887                         me_id = (ring_id & 0x3) >> 0;
5888                         queue_id = (ring_id & 0xc) >> 2;
5889                         switch (me_id) {
5890                         case 0:
5891                                 switch (queue_id) {
5892                                 case 0:
5893                                         queue_reset = true;
5894                                         break;
5895                                 case 1:
5896                                         /* XXX compute */
5897                                         queue_reset = true;
5898                                         break;
5899                                 case 2:
5900                                         /* XXX compute */
5901                                         queue_reset = true;
5902                                         break;
5903                                 }
5904                                 break;
5905                         case 1:
5906                                 switch (queue_id) {
5907                                 case 0:
5908                                         queue_reset = true;
5909                                         break;
5910                                 case 1:
5911                                         /* XXX compute */
5912                                         queue_reset = true;
5913                                         break;
5914                                 case 2:
5915                                         /* XXX compute */
5916                                         queue_reset = true;
5917                                         break;
5918                                 }
5919                                 break;
5920                         }
5921                         break;
5922                 case 233: /* GUI IDLE */
5923                         DRM_DEBUG("IH: GUI idle\n");
5924                         break;
5925                 default:
5926                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5927                         break;
5928                 }
5929
5930                 /* wptr/rptr are in bytes! */
5931                 rptr += 16;
5932                 rptr &= rdev->ih.ptr_mask;
5933         }
5934         if (queue_hotplug)
5935                 schedule_work(&rdev->hotplug_work);
5936         if (queue_reset)
5937                 schedule_work(&rdev->reset_work);
5938         rdev->ih.rptr = rptr;
5939         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5940         atomic_set(&rdev->ih.lock, 0);
5941
5942         /* make sure wptr hasn't changed while processing */
5943         wptr = cik_get_ih_wptr(rdev);
5944         if (wptr != rptr)
5945                 goto restart_ih;
5946
5947         return IRQ_HANDLED;
5948 }
5949
5950 /*
5951  * startup/shutdown callbacks
5952  */
5953 /**
5954  * cik_startup - program the asic to a functional state
5955  *
5956  * @rdev: radeon_device pointer
5957  *
5958  * Programs the asic to a functional state (CIK).
5959  * Called by cik_init() and cik_resume().
5960  * Returns 0 for success, error for failure.
5961  */
5962 static int cik_startup(struct radeon_device *rdev)
5963 {
5964         struct radeon_ring *ring;
5965         int r;
5966
5967         cik_mc_program(rdev);
5968
5969         if (rdev->flags & RADEON_IS_IGP) {
5970                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5971                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5972                         r = cik_init_microcode(rdev);
5973                         if (r) {
5974                                 DRM_ERROR("Failed to load firmware!\n");
5975                                 return r;
5976                         }
5977                 }
5978         } else {
5979                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5980                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5981                     !rdev->mc_fw) {
5982                         r = cik_init_microcode(rdev);
5983                         if (r) {
5984                                 DRM_ERROR("Failed to load firmware!\n");
5985                                 return r;
5986                         }
5987                 }
5988
5989                 r = ci_mc_load_microcode(rdev);
5990                 if (r) {
5991                         DRM_ERROR("Failed to load MC firmware!\n");
5992                         return r;
5993                 }
5994         }
5995
5996         r = r600_vram_scratch_init(rdev);
5997         if (r)
5998                 return r;
5999
6000         r = cik_pcie_gart_enable(rdev);
6001         if (r)
6002                 return r;
6003         cik_gpu_init(rdev);
6004
6005         /* allocate rlc buffers */
6006         r = si_rlc_init(rdev);
6007         if (r) {
6008                 DRM_ERROR("Failed to init rlc BOs!\n");
6009                 return r;
6010         }
6011
6012         /* allocate wb buffer */
6013         r = radeon_wb_init(rdev);
6014         if (r)
6015                 return r;
6016
6017         /* allocate mec buffers */
6018         r = cik_mec_init(rdev);
6019         if (r) {
6020                 DRM_ERROR("Failed to init MEC BOs!\n");
6021                 return r;
6022         }
6023
6024         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6025         if (r) {
6026                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6027                 return r;
6028         }
6029
6030         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6031         if (r) {
6032                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6033                 return r;
6034         }
6035
6036         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6037         if (r) {
6038                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6039                 return r;
6040         }
6041
6042         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6043         if (r) {
6044                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6045                 return r;
6046         }
6047
6048         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6049         if (r) {
6050                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6051                 return r;
6052         }
6053
6054         r = cik_uvd_resume(rdev);
6055         if (!r) {
6056                 r = radeon_fence_driver_start_ring(rdev,
6057                                                    R600_RING_TYPE_UVD_INDEX);
6058                 if (r)
6059                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6060         }
6061         if (r)
6062                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6063
6064         /* Enable IRQ */
6065         if (!rdev->irq.installed) {
6066                 r = radeon_irq_kms_init(rdev);
6067                 if (r)
6068                         return r;
6069         }
6070
6071         r = cik_irq_init(rdev);
6072         if (r) {
6073                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6074                 radeon_irq_kms_fini(rdev);
6075                 return r;
6076         }
6077         cik_irq_set(rdev);
6078
6079         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6080         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6081                              CP_RB0_RPTR, CP_RB0_WPTR,
6082                              0, 0xfffff, RADEON_CP_PACKET2);
6083         if (r)
6084                 return r;
6085
6086         /* set up the compute queues */
6087         /* type-2 packets are deprecated on MEC, use type-3 instead */
6088         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6089         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6090                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6091                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6092         if (r)
6093                 return r;
6094         ring->me = 1; /* first MEC */
6095         ring->pipe = 0; /* first pipe */
6096         ring->queue = 0; /* first queue */
6097         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6098
6099         /* type-2 packets are deprecated on MEC, use type-3 instead */
6100         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6101         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6102                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6103                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6104         if (r)
6105                 return r;
6106         /* dGPU only have 1 MEC */
6107         ring->me = 1; /* first MEC */
6108         ring->pipe = 0; /* first pipe */
6109         ring->queue = 1; /* second queue */
6110         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6111
6112         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6113         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6114                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6115                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6116                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6117         if (r)
6118                 return r;
6119
6120         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6121         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6122                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6123                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6124                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6125         if (r)
6126                 return r;
6127
6128         r = cik_cp_resume(rdev);
6129         if (r)
6130                 return r;
6131
6132         r = cik_sdma_resume(rdev);
6133         if (r)
6134                 return r;
6135
6136         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6137         if (ring->ring_size) {
6138                 r = radeon_ring_init(rdev, ring, ring->ring_size,
6139                                      R600_WB_UVD_RPTR_OFFSET,
6140                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6141                                      0, 0xfffff, RADEON_CP_PACKET2);
6142                 if (!r)
6143                         r = r600_uvd_init(rdev);
6144                 if (r)
6145                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6146         }
6147
6148         r = radeon_ib_pool_init(rdev);
6149         if (r) {
6150                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6151                 return r;
6152         }
6153
6154         r = radeon_vm_manager_init(rdev);
6155         if (r) {
6156                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6157                 return r;
6158         }
6159
6160         return 0;
6161 }
6162
6163 /**
6164  * cik_resume - resume the asic to a functional state
6165  *
6166  * @rdev: radeon_device pointer
6167  *
6168  * Programs the asic to a functional state (CIK).
6169  * Called at resume.
6170  * Returns 0 for success, error for failure.
6171  */
6172 int cik_resume(struct radeon_device *rdev)
6173 {
6174         int r;
6175
6176         /* post card */
6177         atom_asic_init(rdev->mode_info.atom_context);
6178
6179         /* init golden registers */
6180         cik_init_golden_registers(rdev);
6181
6182         rdev->accel_working = true;
6183         r = cik_startup(rdev);
6184         if (r) {
6185                 DRM_ERROR("cik startup failed on resume\n");
6186                 rdev->accel_working = false;
6187                 return r;
6188         }
6189
6190         return r;
6191
6192 }
6193
6194 /**
6195  * cik_suspend - suspend the asic
6196  *
6197  * @rdev: radeon_device pointer
6198  *
6199  * Bring the chip into a state suitable for suspend (CIK).
6200  * Called at suspend.
6201  * Returns 0 for success.
6202  */
6203 int cik_suspend(struct radeon_device *rdev)
6204 {
6205         radeon_vm_manager_fini(rdev);
6206         cik_cp_enable(rdev, false);
6207         cik_sdma_enable(rdev, false);
6208         r600_uvd_stop(rdev);
6209         radeon_uvd_suspend(rdev);
6210         cik_irq_suspend(rdev);
6211         radeon_wb_disable(rdev);
6212         cik_pcie_gart_disable(rdev);
6213         return 0;
6214 }
6215
6216 /* Plan is to move initialization in that function and use
6217  * helper function so that radeon_device_init pretty much
6218  * do nothing more than calling asic specific function. This
6219  * should also allow to remove a bunch of callback function
6220  * like vram_info.
6221  */
6222 /**
6223  * cik_init - asic specific driver and hw init
6224  *
6225  * @rdev: radeon_device pointer
6226  *
6227  * Setup asic specific driver variables and program the hw
6228  * to a functional state (CIK).
6229  * Called at driver startup.
6230  * Returns 0 for success, errors for failure.
6231  */
6232 int cik_init(struct radeon_device *rdev)
6233 {
6234         struct radeon_ring *ring;
6235         int r;
6236
6237         /* Read BIOS */
6238         if (!radeon_get_bios(rdev)) {
6239                 if (ASIC_IS_AVIVO(rdev))
6240                         return -EINVAL;
6241         }
6242         /* Must be an ATOMBIOS */
6243         if (!rdev->is_atom_bios) {
6244                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6245                 return -EINVAL;
6246         }
6247         r = radeon_atombios_init(rdev);
6248         if (r)
6249                 return r;
6250
6251         /* Post card if necessary */
6252         if (!radeon_card_posted(rdev)) {
6253                 if (!rdev->bios) {
6254                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6255                         return -EINVAL;
6256                 }
6257                 DRM_INFO("GPU not posted. posting now...\n");
6258                 atom_asic_init(rdev->mode_info.atom_context);
6259         }
6260         /* init golden registers */
6261         cik_init_golden_registers(rdev);
6262         /* Initialize scratch registers */
6263         cik_scratch_init(rdev);
6264         /* Initialize surface registers */
6265         radeon_surface_init(rdev);
6266         /* Initialize clocks */
6267         radeon_get_clock_info(rdev->ddev);
6268
6269         /* Fence driver */
6270         r = radeon_fence_driver_init(rdev);
6271         if (r)
6272                 return r;
6273
6274         /* initialize memory controller */
6275         r = cik_mc_init(rdev);
6276         if (r)
6277                 return r;
6278         /* Memory manager */
6279         r = radeon_bo_init(rdev);
6280         if (r)
6281                 return r;
6282
6283         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6284         ring->ring_obj = NULL;
6285         r600_ring_init(rdev, ring, 1024 * 1024);
6286
6287         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6288         ring->ring_obj = NULL;
6289         r600_ring_init(rdev, ring, 1024 * 1024);
6290         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6291         if (r)
6292                 return r;
6293
6294         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6295         ring->ring_obj = NULL;
6296         r600_ring_init(rdev, ring, 1024 * 1024);
6297         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6298         if (r)
6299                 return r;
6300
6301         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6302         ring->ring_obj = NULL;
6303         r600_ring_init(rdev, ring, 256 * 1024);
6304
6305         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6306         ring->ring_obj = NULL;
6307         r600_ring_init(rdev, ring, 256 * 1024);
6308
6309         r = radeon_uvd_init(rdev);
6310         if (!r) {
6311                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6312                 ring->ring_obj = NULL;
6313                 r600_ring_init(rdev, ring, 4096);
6314         }
6315
6316         rdev->ih.ring_obj = NULL;
6317         r600_ih_ring_init(rdev, 64 * 1024);
6318
6319         r = r600_pcie_gart_init(rdev);
6320         if (r)
6321                 return r;
6322
6323         rdev->accel_working = true;
6324         r = cik_startup(rdev);
6325         if (r) {
6326                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6327                 cik_cp_fini(rdev);
6328                 cik_sdma_fini(rdev);
6329                 cik_irq_fini(rdev);
6330                 si_rlc_fini(rdev);
6331                 cik_mec_fini(rdev);
6332                 radeon_wb_fini(rdev);
6333                 radeon_ib_pool_fini(rdev);
6334                 radeon_vm_manager_fini(rdev);
6335                 radeon_irq_kms_fini(rdev);
6336                 cik_pcie_gart_fini(rdev);
6337                 rdev->accel_working = false;
6338         }
6339
6340         /* Don't start up if the MC ucode is missing.
6341          * The default clocks and voltages before the MC ucode
6342          * is loaded are not suffient for advanced operations.
6343          */
6344         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6345                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6346                 return -EINVAL;
6347         }
6348
6349         return 0;
6350 }
6351
6352 /**
6353  * cik_fini - asic specific driver and hw fini
6354  *
6355  * @rdev: radeon_device pointer
6356  *
6357  * Tear down the asic specific driver variables and program the hw
6358  * to an idle state (CIK).
6359  * Called at driver unload.
6360  */
6361 void cik_fini(struct radeon_device *rdev)
6362 {
6363         cik_cp_fini(rdev);
6364         cik_sdma_fini(rdev);
6365         cik_irq_fini(rdev);
6366         si_rlc_fini(rdev);
6367         cik_mec_fini(rdev);
6368         radeon_wb_fini(rdev);
6369         radeon_vm_manager_fini(rdev);
6370         radeon_ib_pool_fini(rdev);
6371         radeon_irq_kms_fini(rdev);
6372         r600_uvd_stop(rdev);
6373         radeon_uvd_fini(rdev);
6374         cik_pcie_gart_fini(rdev);
6375         r600_vram_scratch_fini(rdev);
6376         radeon_gem_fini(rdev);
6377         radeon_fence_driver_fini(rdev);
6378         radeon_bo_fini(rdev);
6379         radeon_atombios_fini(rdev);
6380         kfree(rdev->bios);
6381         rdev->bios = NULL;
6382 }
6383
6384 /* display watermark setup */
6385 /**
6386  * dce8_line_buffer_adjust - Set up the line buffer
6387  *
6388  * @rdev: radeon_device pointer
6389  * @radeon_crtc: the selected display controller
6390  * @mode: the current display mode on the selected display
6391  * controller
6392  *
6393  * Setup up the line buffer allocation for
6394  * the selected display controller (CIK).
6395  * Returns the line buffer size in pixels.
6396  */
6397 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6398                                    struct radeon_crtc *radeon_crtc,
6399                                    struct drm_display_mode *mode)
6400 {
6401         u32 tmp;
6402
6403         /*
6404          * Line Buffer Setup
6405          * There are 6 line buffers, one for each display controllers.
6406          * There are 3 partitions per LB. Select the number of partitions
6407          * to enable based on the display width.  For display widths larger
6408          * than 4096, you need use to use 2 display controllers and combine
6409          * them using the stereo blender.
6410          */
6411         if (radeon_crtc->base.enabled && mode) {
6412                 if (mode->crtc_hdisplay < 1920)
6413                         tmp = 1;
6414                 else if (mode->crtc_hdisplay < 2560)
6415                         tmp = 2;
6416                 else if (mode->crtc_hdisplay < 4096)
6417                         tmp = 0;
6418                 else {
6419                         DRM_DEBUG_KMS("Mode too big for LB!\n");
6420                         tmp = 0;
6421                 }
6422         } else
6423                 tmp = 1;
6424
6425         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6426                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6427
6428         if (radeon_crtc->base.enabled && mode) {
6429                 switch (tmp) {
6430                 case 0:
6431                 default:
6432                         return 4096 * 2;
6433                 case 1:
6434                         return 1920 * 2;
6435                 case 2:
6436                         return 2560 * 2;
6437                 }
6438         }
6439
6440         /* controller not enabled, so no lb used */
6441         return 0;
6442 }
6443
6444 /**
6445  * cik_get_number_of_dram_channels - get the number of dram channels
6446  *
6447  * @rdev: radeon_device pointer
6448  *
6449  * Look up the number of video ram channels (CIK).
6450  * Used for display watermark bandwidth calculations
6451  * Returns the number of dram channels
6452  */
6453 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6454 {
6455         u32 tmp = RREG32(MC_SHARED_CHMAP);
6456
6457         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6458         case 0:
6459         default:
6460                 return 1;
6461         case 1:
6462                 return 2;
6463         case 2:
6464                 return 4;
6465         case 3:
6466                 return 8;
6467         case 4:
6468                 return 3;
6469         case 5:
6470                 return 6;
6471         case 6:
6472                 return 10;
6473         case 7:
6474                 return 12;
6475         case 8:
6476                 return 16;
6477         }
6478 }
6479
6480 struct dce8_wm_params {
6481         u32 dram_channels; /* number of dram channels */
6482         u32 yclk;          /* bandwidth per dram data pin in kHz */
6483         u32 sclk;          /* engine clock in kHz */
6484         u32 disp_clk;      /* display clock in kHz */
6485         u32 src_width;     /* viewport width */
6486         u32 active_time;   /* active display time in ns */
6487         u32 blank_time;    /* blank time in ns */
6488         bool interlaced;    /* mode is interlaced */
6489         fixed20_12 vsc;    /* vertical scale ratio */
6490         u32 num_heads;     /* number of active crtcs */
6491         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6492         u32 lb_size;       /* line buffer allocated to pipe */
6493         u32 vtaps;         /* vertical scaler taps */
6494 };
6495
6496 /**
6497  * dce8_dram_bandwidth - get the dram bandwidth
6498  *
6499  * @wm: watermark calculation data
6500  *
6501  * Calculate the raw dram bandwidth (CIK).
6502  * Used for display watermark bandwidth calculations
6503  * Returns the dram bandwidth in MBytes/s
6504  */
6505 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6506 {
6507         /* Calculate raw DRAM Bandwidth */
6508         fixed20_12 dram_efficiency; /* 0.7 */
6509         fixed20_12 yclk, dram_channels, bandwidth;
6510         fixed20_12 a;
6511
6512         a.full = dfixed_const(1000);
6513         yclk.full = dfixed_const(wm->yclk);
6514         yclk.full = dfixed_div(yclk, a);
6515         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516         a.full = dfixed_const(10);
6517         dram_efficiency.full = dfixed_const(7);
6518         dram_efficiency.full = dfixed_div(dram_efficiency, a);
6519         bandwidth.full = dfixed_mul(dram_channels, yclk);
6520         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6521
6522         return dfixed_trunc(bandwidth);
6523 }
6524
6525 /**
6526  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6527  *
6528  * @wm: watermark calculation data
6529  *
6530  * Calculate the dram bandwidth used for display (CIK).
6531  * Used for display watermark bandwidth calculations
6532  * Returns the dram bandwidth for display in MBytes/s
6533  */
6534 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6535 {
6536         /* Calculate DRAM Bandwidth and the part allocated to display. */
6537         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6538         fixed20_12 yclk, dram_channels, bandwidth;
6539         fixed20_12 a;
6540
6541         a.full = dfixed_const(1000);
6542         yclk.full = dfixed_const(wm->yclk);
6543         yclk.full = dfixed_div(yclk, a);
6544         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6545         a.full = dfixed_const(10);
6546         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6547         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6548         bandwidth.full = dfixed_mul(dram_channels, yclk);
6549         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6550
6551         return dfixed_trunc(bandwidth);
6552 }
6553
6554 /**
6555  * dce8_data_return_bandwidth - get the data return bandwidth
6556  *
6557  * @wm: watermark calculation data
6558  *
6559  * Calculate the data return bandwidth used for display (CIK).
6560  * Used for display watermark bandwidth calculations
6561  * Returns the data return bandwidth in MBytes/s
6562  */
6563 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6564 {
6565         /* Calculate the display Data return Bandwidth */
6566         fixed20_12 return_efficiency; /* 0.8 */
6567         fixed20_12 sclk, bandwidth;
6568         fixed20_12 a;
6569
6570         a.full = dfixed_const(1000);
6571         sclk.full = dfixed_const(wm->sclk);
6572         sclk.full = dfixed_div(sclk, a);
6573         a.full = dfixed_const(10);
6574         return_efficiency.full = dfixed_const(8);
6575         return_efficiency.full = dfixed_div(return_efficiency, a);
6576         a.full = dfixed_const(32);
6577         bandwidth.full = dfixed_mul(a, sclk);
6578         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6579
6580         return dfixed_trunc(bandwidth);
6581 }
6582
6583 /**
6584  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6585  *
6586  * @wm: watermark calculation data
6587  *
6588  * Calculate the dmif bandwidth used for display (CIK).
6589  * Used for display watermark bandwidth calculations
6590  * Returns the dmif bandwidth in MBytes/s
6591  */
6592 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6593 {
6594         /* Calculate the DMIF Request Bandwidth */
6595         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6596         fixed20_12 disp_clk, bandwidth;
6597         fixed20_12 a, b;
6598
6599         a.full = dfixed_const(1000);
6600         disp_clk.full = dfixed_const(wm->disp_clk);
6601         disp_clk.full = dfixed_div(disp_clk, a);
6602         a.full = dfixed_const(32);
6603         b.full = dfixed_mul(a, disp_clk);
6604
6605         a.full = dfixed_const(10);
6606         disp_clk_request_efficiency.full = dfixed_const(8);
6607         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6608
6609         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6610
6611         return dfixed_trunc(bandwidth);
6612 }
6613
6614 /**
6615  * dce8_available_bandwidth - get the min available bandwidth
6616  *
6617  * @wm: watermark calculation data
6618  *
6619  * Calculate the min available bandwidth used for display (CIK).
6620  * Used for display watermark bandwidth calculations
6621  * Returns the min available bandwidth in MBytes/s
6622  */
6623 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6624 {
6625         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6626         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6627         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6628         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6629
6630         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6631 }
6632
6633 /**
6634  * dce8_average_bandwidth - get the average available bandwidth
6635  *
6636  * @wm: watermark calculation data
6637  *
6638  * Calculate the average available bandwidth used for display (CIK).
6639  * Used for display watermark bandwidth calculations
6640  * Returns the average available bandwidth in MBytes/s
6641  */
6642 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6643 {
6644         /* Calculate the display mode Average Bandwidth
6645          * DisplayMode should contain the source and destination dimensions,
6646          * timing, etc.
6647          */
6648         fixed20_12 bpp;
6649         fixed20_12 line_time;
6650         fixed20_12 src_width;
6651         fixed20_12 bandwidth;
6652         fixed20_12 a;
6653
6654         a.full = dfixed_const(1000);
6655         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6656         line_time.full = dfixed_div(line_time, a);
6657         bpp.full = dfixed_const(wm->bytes_per_pixel);
6658         src_width.full = dfixed_const(wm->src_width);
6659         bandwidth.full = dfixed_mul(src_width, bpp);
6660         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6661         bandwidth.full = dfixed_div(bandwidth, line_time);
6662
6663         return dfixed_trunc(bandwidth);
6664 }
6665
6666 /**
6667  * dce8_latency_watermark - get the latency watermark
6668  *
6669  * @wm: watermark calculation data
6670  *
6671  * Calculate the latency watermark (CIK).
6672  * Used for display watermark bandwidth calculations
6673  * Returns the latency watermark in ns
6674  */
6675 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6676 {
6677         /* First calculate the latency in ns */
6678         u32 mc_latency = 2000; /* 2000 ns. */
6679         u32 available_bandwidth = dce8_available_bandwidth(wm);
6680         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6681         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6682         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6683         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6684                 (wm->num_heads * cursor_line_pair_return_time);
6685         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6686         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6687         u32 tmp, dmif_size = 12288;
6688         fixed20_12 a, b, c;
6689
6690         if (wm->num_heads == 0)
6691                 return 0;
6692
6693         a.full = dfixed_const(2);
6694         b.full = dfixed_const(1);
6695         if ((wm->vsc.full > a.full) ||
6696             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6697             (wm->vtaps >= 5) ||
6698             ((wm->vsc.full >= a.full) && wm->interlaced))
6699                 max_src_lines_per_dst_line = 4;
6700         else
6701                 max_src_lines_per_dst_line = 2;
6702
6703         a.full = dfixed_const(available_bandwidth);
6704         b.full = dfixed_const(wm->num_heads);
6705         a.full = dfixed_div(a, b);
6706
6707         b.full = dfixed_const(mc_latency + 512);
6708         c.full = dfixed_const(wm->disp_clk);
6709         b.full = dfixed_div(b, c);
6710
6711         c.full = dfixed_const(dmif_size);
6712         b.full = dfixed_div(c, b);
6713
6714         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6715
6716         b.full = dfixed_const(1000);
6717         c.full = dfixed_const(wm->disp_clk);
6718         b.full = dfixed_div(c, b);
6719         c.full = dfixed_const(wm->bytes_per_pixel);
6720         b.full = dfixed_mul(b, c);
6721
6722         lb_fill_bw = min(tmp, dfixed_trunc(b));
6723
6724         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6725         b.full = dfixed_const(1000);
6726         c.full = dfixed_const(lb_fill_bw);
6727         b.full = dfixed_div(c, b);
6728         a.full = dfixed_div(a, b);
6729         line_fill_time = dfixed_trunc(a);
6730
6731         if (line_fill_time < wm->active_time)
6732                 return latency;
6733         else
6734                 return latency + (line_fill_time - wm->active_time);
6735
6736 }
6737
6738 /**
6739  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6740  * average and available dram bandwidth
6741  *
6742  * @wm: watermark calculation data
6743  *
6744  * Check if the display average bandwidth fits in the display
6745  * dram bandwidth (CIK).
6746  * Used for display watermark bandwidth calculations
6747  * Returns true if the display fits, false if not.
6748  */
6749 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6750 {
6751         if (dce8_average_bandwidth(wm) <=
6752             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6753                 return true;
6754         else
6755                 return false;
6756 }
6757
6758 /**
6759  * dce8_average_bandwidth_vs_available_bandwidth - check
6760  * average and available bandwidth
6761  *
6762  * @wm: watermark calculation data
6763  *
6764  * Check if the display average bandwidth fits in the display
6765  * available bandwidth (CIK).
6766  * Used for display watermark bandwidth calculations
6767  * Returns true if the display fits, false if not.
6768  */
6769 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6770 {
6771         if (dce8_average_bandwidth(wm) <=
6772             (dce8_available_bandwidth(wm) / wm->num_heads))
6773                 return true;
6774         else
6775                 return false;
6776 }
6777
6778 /**
6779  * dce8_check_latency_hiding - check latency hiding
6780  *
6781  * @wm: watermark calculation data
6782  *
6783  * Check latency hiding (CIK).
6784  * Used for display watermark bandwidth calculations
6785  * Returns true if the display fits, false if not.
6786  */
6787 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6788 {
6789         u32 lb_partitions = wm->lb_size / wm->src_width;
6790         u32 line_time = wm->active_time + wm->blank_time;
6791         u32 latency_tolerant_lines;
6792         u32 latency_hiding;
6793         fixed20_12 a;
6794
6795         a.full = dfixed_const(1);
6796         if (wm->vsc.full > a.full)
6797                 latency_tolerant_lines = 1;
6798         else {
6799                 if (lb_partitions <= (wm->vtaps + 1))
6800                         latency_tolerant_lines = 1;
6801                 else
6802                         latency_tolerant_lines = 2;
6803         }
6804
6805         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6806
6807         if (dce8_latency_watermark(wm) <= latency_hiding)
6808                 return true;
6809         else
6810                 return false;
6811 }
6812
6813 /**
6814  * dce8_program_watermarks - program display watermarks
6815  *
6816  * @rdev: radeon_device pointer
6817  * @radeon_crtc: the selected display controller
6818  * @lb_size: line buffer size
6819  * @num_heads: number of display controllers in use
6820  *
6821  * Calculate and program the display watermarks for the
6822  * selected display controller (CIK).
6823  */
6824 static void dce8_program_watermarks(struct radeon_device *rdev,
6825                                     struct radeon_crtc *radeon_crtc,
6826                                     u32 lb_size, u32 num_heads)
6827 {
6828         struct drm_display_mode *mode = &radeon_crtc->base.mode;
6829         struct dce8_wm_params wm;
6830         u32 pixel_period;
6831         u32 line_time = 0;
6832         u32 latency_watermark_a = 0, latency_watermark_b = 0;
6833         u32 tmp, wm_mask;
6834
6835         if (radeon_crtc->base.enabled && num_heads && mode) {
6836                 pixel_period = 1000000 / (u32)mode->clock;
6837                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6838
6839                 wm.yclk = rdev->pm.current_mclk * 10;
6840                 wm.sclk = rdev->pm.current_sclk * 10;
6841                 wm.disp_clk = mode->clock;
6842                 wm.src_width = mode->crtc_hdisplay;
6843                 wm.active_time = mode->crtc_hdisplay * pixel_period;
6844                 wm.blank_time = line_time - wm.active_time;
6845                 wm.interlaced = false;
6846                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6847                         wm.interlaced = true;
6848                 wm.vsc = radeon_crtc->vsc;
6849                 wm.vtaps = 1;
6850                 if (radeon_crtc->rmx_type != RMX_OFF)
6851                         wm.vtaps = 2;
6852                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6853                 wm.lb_size = lb_size;
6854                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6855                 wm.num_heads = num_heads;
6856
6857                 /* set for high clocks */
6858                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6859                 /* set for low clocks */
6860                 /* wm.yclk = low clk; wm.sclk = low clk */
6861                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6862
6863                 /* possibly force display priority to high */
6864                 /* should really do this at mode validation time... */
6865                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6866                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6867                     !dce8_check_latency_hiding(&wm) ||
6868                     (rdev->disp_priority == 2)) {
6869                         DRM_DEBUG_KMS("force priority to high\n");
6870                 }
6871         }
6872
6873         /* select wm A */
6874         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6875         tmp = wm_mask;
6876         tmp &= ~LATENCY_WATERMARK_MASK(3);
6877         tmp |= LATENCY_WATERMARK_MASK(1);
6878         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6879         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6880                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6881                 LATENCY_HIGH_WATERMARK(line_time)));
6882         /* select wm B */
6883         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6884         tmp &= ~LATENCY_WATERMARK_MASK(3);
6885         tmp |= LATENCY_WATERMARK_MASK(2);
6886         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6887         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6888                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6889                 LATENCY_HIGH_WATERMARK(line_time)));
6890         /* restore original selection */
6891         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6892 }
6893
6894 /**
6895  * dce8_bandwidth_update - program display watermarks
6896  *
6897  * @rdev: radeon_device pointer
6898  *
6899  * Calculate and program the display watermarks and line
6900  * buffer allocation (CIK).
6901  */
6902 void dce8_bandwidth_update(struct radeon_device *rdev)
6903 {
6904         struct drm_display_mode *mode = NULL;
6905         u32 num_heads = 0, lb_size;
6906         int i;
6907
6908         radeon_update_display_priority(rdev);
6909
6910         for (i = 0; i < rdev->num_crtc; i++) {
6911                 if (rdev->mode_info.crtcs[i]->base.enabled)
6912                         num_heads++;
6913         }
6914         for (i = 0; i < rdev->num_crtc; i++) {
6915                 mode = &rdev->mode_info.crtcs[i]->base.mode;
6916                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6917                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6918         }
6919 }
6920
6921 /**
6922  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6923  *
6924  * @rdev: radeon_device pointer
6925  *
6926  * Fetches a GPU clock counter snapshot (SI).
6927  * Returns the 64 bit clock counter snapshot.
6928  */
6929 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6930 {
6931         uint64_t clock;
6932
6933         mutex_lock(&rdev->gpu_clock_mutex);
6934         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6935         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6936                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6937         mutex_unlock(&rdev->gpu_clock_mutex);
6938         return clock;
6939 }
6940
6941 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6942                               u32 cntl_reg, u32 status_reg)
6943 {
6944         int r, i;
6945         struct atom_clock_dividers dividers;
6946         uint32_t tmp;
6947
6948         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6949                                            clock, false, &dividers);
6950         if (r)
6951                 return r;
6952
6953         tmp = RREG32_SMC(cntl_reg);
6954         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6955         tmp |= dividers.post_divider;
6956         WREG32_SMC(cntl_reg, tmp);
6957
6958         for (i = 0; i < 100; i++) {
6959                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6960                         break;
6961                 mdelay(10);
6962         }
6963         if (i == 100)
6964                 return -ETIMEDOUT;
6965
6966         return 0;
6967 }
6968
6969 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6970 {
6971         int r = 0;
6972
6973         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6974         if (r)
6975                 return r;
6976
6977         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6978         return r;
6979 }
6980
6981 int cik_uvd_resume(struct radeon_device *rdev)
6982 {
6983         uint64_t addr;
6984         uint32_t size;
6985         int r;
6986
6987         r = radeon_uvd_resume(rdev);
6988         if (r)
6989                 return r;
6990
6991         /* programm the VCPU memory controller bits 0-27 */
6992         addr = rdev->uvd.gpu_addr >> 3;
6993         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6994         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6995         WREG32(UVD_VCPU_CACHE_SIZE0, size);
6996
6997         addr += size;
6998         size = RADEON_UVD_STACK_SIZE >> 3;
6999         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7000         WREG32(UVD_VCPU_CACHE_SIZE1, size);
7001
7002         addr += size;
7003         size = RADEON_UVD_HEAP_SIZE >> 3;
7004         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7005         WREG32(UVD_VCPU_CACHE_SIZE2, size);
7006
7007         /* bits 28-31 */
7008         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7009         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7010
7011         /* bits 32-39 */
7012         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7013         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7014
7015         return 0;
7016 }