]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
35f9cd83b8219de6eb50b4ac7c2ba4048aa2e488
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662
663 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 {
665         switch (adev->asic_type) {
666         case CHIP_TOPAZ:
667                 amdgpu_program_register_sequence(adev,
668                                                  iceland_mgcg_cgcg_init,
669                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
670                 amdgpu_program_register_sequence(adev,
671                                                  golden_settings_iceland_a11,
672                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
673                 amdgpu_program_register_sequence(adev,
674                                                  iceland_golden_common_all,
675                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
676                 break;
677         case CHIP_FIJI:
678                 amdgpu_program_register_sequence(adev,
679                                                  fiji_mgcg_cgcg_init,
680                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
681                 amdgpu_program_register_sequence(adev,
682                                                  golden_settings_fiji_a10,
683                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
684                 amdgpu_program_register_sequence(adev,
685                                                  fiji_golden_common_all,
686                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
687                 break;
688
689         case CHIP_TONGA:
690                 amdgpu_program_register_sequence(adev,
691                                                  tonga_mgcg_cgcg_init,
692                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
693                 amdgpu_program_register_sequence(adev,
694                                                  golden_settings_tonga_a11,
695                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
696                 amdgpu_program_register_sequence(adev,
697                                                  tonga_golden_common_all,
698                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
699                 break;
700         case CHIP_POLARIS11:
701         case CHIP_POLARIS12:
702                 amdgpu_program_register_sequence(adev,
703                                                  golden_settings_polaris11_a11,
704                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
705                 amdgpu_program_register_sequence(adev,
706                                                  polaris11_golden_common_all,
707                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
708                 break;
709         case CHIP_POLARIS10:
710                 amdgpu_program_register_sequence(adev,
711                                                  golden_settings_polaris10_a11,
712                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
713                 amdgpu_program_register_sequence(adev,
714                                                  polaris10_golden_common_all,
715                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
716                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717                 if (adev->pdev->revision == 0xc7 &&
718                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
723                 }
724                 break;
725         case CHIP_CARRIZO:
726                 amdgpu_program_register_sequence(adev,
727                                                  cz_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  cz_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  cz_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
735                 break;
736         case CHIP_STONEY:
737                 amdgpu_program_register_sequence(adev,
738                                                  stoney_mgcg_cgcg_init,
739                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
740                 amdgpu_program_register_sequence(adev,
741                                                  stoney_golden_settings_a11,
742                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
743                 amdgpu_program_register_sequence(adev,
744                                                  stoney_golden_common_all,
745                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
746                 break;
747         default:
748                 break;
749         }
750 }
751
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 {
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
757 }
758
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 {
761         struct amdgpu_device *adev = ring->adev;
762         uint32_t scratch;
763         uint32_t tmp = 0;
764         unsigned i;
765         int r;
766
767         r = amdgpu_gfx_scratch_get(adev, &scratch);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
770                 return r;
771         }
772         WREG32(scratch, 0xCAFEDEAD);
773         r = amdgpu_ring_alloc(ring, 3);
774         if (r) {
775                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776                           ring->idx, r);
777                 amdgpu_gfx_scratch_free(adev, scratch);
778                 return r;
779         }
780         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782         amdgpu_ring_write(ring, 0xDEADBEEF);
783         amdgpu_ring_commit(ring);
784
785         for (i = 0; i < adev->usec_timeout; i++) {
786                 tmp = RREG32(scratch);
787                 if (tmp == 0xDEADBEEF)
788                         break;
789                 DRM_UDELAY(1);
790         }
791         if (i < adev->usec_timeout) {
792                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
793                          ring->idx, i);
794         } else {
795                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796                           ring->idx, scratch, tmp);
797                 r = -EINVAL;
798         }
799         amdgpu_gfx_scratch_free(adev, scratch);
800         return r;
801 }
802
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 {
805         struct amdgpu_device *adev = ring->adev;
806         struct amdgpu_ib ib;
807         struct dma_fence *f = NULL;
808         uint32_t scratch;
809         uint32_t tmp = 0;
810         long r;
811
812         r = amdgpu_gfx_scratch_get(adev, &scratch);
813         if (r) {
814                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
815                 return r;
816         }
817         WREG32(scratch, 0xCAFEDEAD);
818         memset(&ib, 0, sizeof(ib));
819         r = amdgpu_ib_get(adev, NULL, 256, &ib);
820         if (r) {
821                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
822                 goto err1;
823         }
824         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
825         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
826         ib.ptr[2] = 0xDEADBEEF;
827         ib.length_dw = 3;
828
829         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
830         if (r)
831                 goto err2;
832
833         r = dma_fence_wait_timeout(f, false, timeout);
834         if (r == 0) {
835                 DRM_ERROR("amdgpu: IB test timed out.\n");
836                 r = -ETIMEDOUT;
837                 goto err2;
838         } else if (r < 0) {
839                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
840                 goto err2;
841         }
842         tmp = RREG32(scratch);
843         if (tmp == 0xDEADBEEF) {
844                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
845                 r = 0;
846         } else {
847                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
848                           scratch, tmp);
849                 r = -EINVAL;
850         }
851 err2:
852         amdgpu_ib_free(adev, &ib, NULL);
853         dma_fence_put(f);
854 err1:
855         amdgpu_gfx_scratch_free(adev, scratch);
856         return r;
857 }
858
859
860 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.me_fw);
937         if (err)
938                 goto out;
939         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942         /* chain ib ucode isn't formal released, just disable it by far
943          * TODO: when ucod ready we should use ucode version to judge if
944          * chain-ib support or not.
945          */
946         adev->virt.chained_ib_support = false;
947
948         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
949
950         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
951         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
952         if (err)
953                 goto out;
954         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
955         if (err)
956                 goto out;
957         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
958         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
960
961         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
962         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
963         if (err)
964                 goto out;
965         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
966         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
967         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
968         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
969
970         adev->gfx.rlc.save_and_restore_offset =
971                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
972         adev->gfx.rlc.clear_state_descriptor_offset =
973                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
974         adev->gfx.rlc.avail_scratch_ram_locations =
975                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
976         adev->gfx.rlc.reg_restore_list_size =
977                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
978         adev->gfx.rlc.reg_list_format_start =
979                         le32_to_cpu(rlc_hdr->reg_list_format_start);
980         adev->gfx.rlc.reg_list_format_separate_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
982         adev->gfx.rlc.starting_offsets_start =
983                         le32_to_cpu(rlc_hdr->starting_offsets_start);
984         adev->gfx.rlc.reg_list_format_size_bytes =
985                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
986         adev->gfx.rlc.reg_list_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
988
989         adev->gfx.rlc.register_list_format =
990                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
991                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
992
993         if (!adev->gfx.rlc.register_list_format) {
994                 err = -ENOMEM;
995                 goto out;
996         }
997
998         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
999                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1000         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1001                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1002
1003         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1004
1005         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1006                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1007         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1008                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1009
1010         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1011         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1012         if (err)
1013                 goto out;
1014         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1015         if (err)
1016                 goto out;
1017         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1018         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1019         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1020
1021         if ((adev->asic_type != CHIP_STONEY) &&
1022             (adev->asic_type != CHIP_TOPAZ)) {
1023                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1024                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1025                 if (!err) {
1026                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1027                         if (err)
1028                                 goto out;
1029                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1030                                 adev->gfx.mec2_fw->data;
1031                         adev->gfx.mec2_fw_version =
1032                                 le32_to_cpu(cp_hdr->header.ucode_version);
1033                         adev->gfx.mec2_feature_version =
1034                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1035                 } else {
1036                         err = 0;
1037                         adev->gfx.mec2_fw = NULL;
1038                 }
1039         }
1040
1041         if (adev->firmware.smu_load) {
1042                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1043                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1044                 info->fw = adev->gfx.pfp_fw;
1045                 header = (const struct common_firmware_header *)info->fw->data;
1046                 adev->firmware.fw_size +=
1047                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1048
1049                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1050                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1051                 info->fw = adev->gfx.me_fw;
1052                 header = (const struct common_firmware_header *)info->fw->data;
1053                 adev->firmware.fw_size +=
1054                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055
1056                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1057                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1058                 info->fw = adev->gfx.ce_fw;
1059                 header = (const struct common_firmware_header *)info->fw->data;
1060                 adev->firmware.fw_size +=
1061                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062
1063                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1064                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1065                 info->fw = adev->gfx.rlc_fw;
1066                 header = (const struct common_firmware_header *)info->fw->data;
1067                 adev->firmware.fw_size +=
1068                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1069
1070                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1071                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1072                 info->fw = adev->gfx.mec_fw;
1073                 header = (const struct common_firmware_header *)info->fw->data;
1074                 adev->firmware.fw_size +=
1075                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1076
1077                 /* we need account JT in */
1078                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1079                 adev->firmware.fw_size +=
1080                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1081
1082                 if (amdgpu_sriov_vf(adev)) {
1083                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1084                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1085                         info->fw = adev->gfx.mec_fw;
1086                         adev->firmware.fw_size +=
1087                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1088                 }
1089
1090                 if (adev->gfx.mec2_fw) {
1091                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1092                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1093                         info->fw = adev->gfx.mec2_fw;
1094                         header = (const struct common_firmware_header *)info->fw->data;
1095                         adev->firmware.fw_size +=
1096                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1097                 }
1098
1099         }
1100
1101 out:
1102         if (err) {
1103                 dev_err(adev->dev,
1104                         "gfx8: Failed to load firmware \"%s\"\n",
1105                         fw_name);
1106                 release_firmware(adev->gfx.pfp_fw);
1107                 adev->gfx.pfp_fw = NULL;
1108                 release_firmware(adev->gfx.me_fw);
1109                 adev->gfx.me_fw = NULL;
1110                 release_firmware(adev->gfx.ce_fw);
1111                 adev->gfx.ce_fw = NULL;
1112                 release_firmware(adev->gfx.rlc_fw);
1113                 adev->gfx.rlc_fw = NULL;
1114                 release_firmware(adev->gfx.mec_fw);
1115                 adev->gfx.mec_fw = NULL;
1116                 release_firmware(adev->gfx.mec2_fw);
1117                 adev->gfx.mec2_fw = NULL;
1118         }
1119         return err;
1120 }
1121
1122 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1123                                     volatile u32 *buffer)
1124 {
1125         u32 count = 0, i;
1126         const struct cs_section_def *sect = NULL;
1127         const struct cs_extent_def *ext = NULL;
1128
1129         if (adev->gfx.rlc.cs_data == NULL)
1130                 return;
1131         if (buffer == NULL)
1132                 return;
1133
1134         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1135         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1136
1137         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1138         buffer[count++] = cpu_to_le32(0x80000000);
1139         buffer[count++] = cpu_to_le32(0x80000000);
1140
1141         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1142                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1143                         if (sect->id == SECT_CONTEXT) {
1144                                 buffer[count++] =
1145                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1146                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1147                                                 PACKET3_SET_CONTEXT_REG_START);
1148                                 for (i = 0; i < ext->reg_count; i++)
1149                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1150                         } else {
1151                                 return;
1152                         }
1153                 }
1154         }
1155
1156         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1157         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1158                         PACKET3_SET_CONTEXT_REG_START);
1159         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1160         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1163         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1164
1165         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1166         buffer[count++] = cpu_to_le32(0);
1167 }
1168
1169 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1170 {
1171         const __le32 *fw_data;
1172         volatile u32 *dst_ptr;
1173         int me, i, max_me = 4;
1174         u32 bo_offset = 0;
1175         u32 table_offset, table_size;
1176
1177         if (adev->asic_type == CHIP_CARRIZO)
1178                 max_me = 5;
1179
1180         /* write the cp table buffer */
1181         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1182         for (me = 0; me < max_me; me++) {
1183                 if (me == 0) {
1184                         const struct gfx_firmware_header_v1_0 *hdr =
1185                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1186                         fw_data = (const __le32 *)
1187                                 (adev->gfx.ce_fw->data +
1188                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189                         table_offset = le32_to_cpu(hdr->jt_offset);
1190                         table_size = le32_to_cpu(hdr->jt_size);
1191                 } else if (me == 1) {
1192                         const struct gfx_firmware_header_v1_0 *hdr =
1193                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1194                         fw_data = (const __le32 *)
1195                                 (adev->gfx.pfp_fw->data +
1196                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197                         table_offset = le32_to_cpu(hdr->jt_offset);
1198                         table_size = le32_to_cpu(hdr->jt_size);
1199                 } else if (me == 2) {
1200                         const struct gfx_firmware_header_v1_0 *hdr =
1201                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1202                         fw_data = (const __le32 *)
1203                                 (adev->gfx.me_fw->data +
1204                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205                         table_offset = le32_to_cpu(hdr->jt_offset);
1206                         table_size = le32_to_cpu(hdr->jt_size);
1207                 } else if (me == 3) {
1208                         const struct gfx_firmware_header_v1_0 *hdr =
1209                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210                         fw_data = (const __le32 *)
1211                                 (adev->gfx.mec_fw->data +
1212                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1213                         table_offset = le32_to_cpu(hdr->jt_offset);
1214                         table_size = le32_to_cpu(hdr->jt_size);
1215                 } else  if (me == 4) {
1216                         const struct gfx_firmware_header_v1_0 *hdr =
1217                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1218                         fw_data = (const __le32 *)
1219                                 (adev->gfx.mec2_fw->data +
1220                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1221                         table_offset = le32_to_cpu(hdr->jt_offset);
1222                         table_size = le32_to_cpu(hdr->jt_size);
1223                 }
1224
1225                 for (i = 0; i < table_size; i ++) {
1226                         dst_ptr[bo_offset + i] =
1227                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1228                 }
1229
1230                 bo_offset += table_size;
1231         }
1232 }
1233
1234 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1235 {
1236         int r;
1237
1238         /* clear state block */
1239         if (adev->gfx.rlc.clear_state_obj) {
1240                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1241                 if (unlikely(r != 0))
1242                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1243                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1244                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1245                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1246                 adev->gfx.rlc.clear_state_obj = NULL;
1247         }
1248
1249         /* jump table block */
1250         if (adev->gfx.rlc.cp_table_obj) {
1251                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1252                 if (unlikely(r != 0))
1253                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1254                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1255                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1256                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1257                 adev->gfx.rlc.cp_table_obj = NULL;
1258         }
1259 }
1260
1261 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1262 {
1263         volatile u32 *dst_ptr;
1264         u32 dws;
1265         const struct cs_section_def *cs_data;
1266         int r;
1267
1268         adev->gfx.rlc.cs_data = vi_cs_data;
1269
1270         cs_data = adev->gfx.rlc.cs_data;
1271
1272         if (cs_data) {
1273                 /* clear state block */
1274                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1275
1276                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1277                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1278                                              AMDGPU_GEM_DOMAIN_VRAM,
1279                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1280                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1281                                              NULL, NULL,
1282                                              &adev->gfx.rlc.clear_state_obj);
1283                         if (r) {
1284                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1285                                 gfx_v8_0_rlc_fini(adev);
1286                                 return r;
1287                         }
1288                 }
1289                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1290                 if (unlikely(r != 0)) {
1291                         gfx_v8_0_rlc_fini(adev);
1292                         return r;
1293                 }
1294                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1295                                   &adev->gfx.rlc.clear_state_gpu_addr);
1296                 if (r) {
1297                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1298                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1299                         gfx_v8_0_rlc_fini(adev);
1300                         return r;
1301                 }
1302
1303                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1304                 if (r) {
1305                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1306                         gfx_v8_0_rlc_fini(adev);
1307                         return r;
1308                 }
1309                 /* set up the cs buffer */
1310                 dst_ptr = adev->gfx.rlc.cs_ptr;
1311                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1312                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1313                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1314         }
1315
1316         if ((adev->asic_type == CHIP_CARRIZO) ||
1317             (adev->asic_type == CHIP_STONEY)) {
1318                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1319                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1320                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1321                                              AMDGPU_GEM_DOMAIN_VRAM,
1322                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1323                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1324                                              NULL, NULL,
1325                                              &adev->gfx.rlc.cp_table_obj);
1326                         if (r) {
1327                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1328                                 return r;
1329                         }
1330                 }
1331
1332                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1333                 if (unlikely(r != 0)) {
1334                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1335                         return r;
1336                 }
1337                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1338                                   &adev->gfx.rlc.cp_table_gpu_addr);
1339                 if (r) {
1340                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1341                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1342                         return r;
1343                 }
1344                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1345                 if (r) {
1346                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1347                         return r;
1348                 }
1349
1350                 cz_init_cp_jump_table(adev);
1351
1352                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1354         }
1355
1356         return 0;
1357 }
1358
1359 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1360 {
1361         int r;
1362
1363         if (adev->gfx.mec.hpd_eop_obj) {
1364                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1365                 if (unlikely(r != 0))
1366                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1367                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1368                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1369                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1370                 adev->gfx.mec.hpd_eop_obj = NULL;
1371         }
1372 }
1373
1374 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1375                                   struct amdgpu_ring *ring,
1376                                   struct amdgpu_irq_src *irq)
1377 {
1378         int r = 0;
1379
1380         if (amdgpu_sriov_vf(adev)) {
1381                 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1382                 if (r)
1383                         return r;
1384         }
1385
1386         ring->adev = NULL;
1387         ring->ring_obj = NULL;
1388         ring->use_doorbell = true;
1389         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390         if (adev->gfx.mec2_fw) {
1391                 ring->me = 2;
1392                 ring->pipe = 0;
1393         } else {
1394                 ring->me = 1;
1395                 ring->pipe = 1;
1396         }
1397
1398         irq->data = ring;
1399         ring->queue = 0;
1400         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1401         r = amdgpu_ring_init(adev, ring, 1024,
1402                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1403         if (r)
1404                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1405
1406         return r;
1407 }
1408
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410                                    struct amdgpu_irq_src *irq)
1411 {
1412         if (amdgpu_sriov_vf(ring->adev))
1413                 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1414
1415         amdgpu_ring_fini(ring);
1416         irq->data = NULL;
1417 }
1418
1419 #define MEC_HPD_SIZE 2048
1420
1421 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1422 {
1423         int r;
1424         u32 *hpd;
1425
1426         /*
1427          * we assign only 1 pipe because all other pipes will
1428          * be handled by KFD
1429          */
1430         adev->gfx.mec.num_mec = 1;
1431         adev->gfx.mec.num_pipe = 1;
1432         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1433
1434         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1435                 r = amdgpu_bo_create(adev,
1436                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1437                                      PAGE_SIZE, true,
1438                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1439                                      &adev->gfx.mec.hpd_eop_obj);
1440                 if (r) {
1441                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1442                         return r;
1443                 }
1444         }
1445
1446         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1447         if (unlikely(r != 0)) {
1448                 gfx_v8_0_mec_fini(adev);
1449                 return r;
1450         }
1451         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1452                           &adev->gfx.mec.hpd_eop_gpu_addr);
1453         if (r) {
1454                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1455                 gfx_v8_0_mec_fini(adev);
1456                 return r;
1457         }
1458         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1459         if (r) {
1460                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1461                 gfx_v8_0_mec_fini(adev);
1462                 return r;
1463         }
1464
1465         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1466
1467         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1468         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1469
1470         return 0;
1471 }
1472
1473 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1474 {
1475         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1476
1477         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1478         kiq->eop_obj = NULL;
1479 }
1480
1481 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1482 {
1483         int r;
1484         u32 *hpd;
1485         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1486
1487         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1488                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1489                                     &kiq->eop_gpu_addr, (void **)&hpd);
1490         if (r) {
1491                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1492                 return r;
1493         }
1494
1495         memset(hpd, 0, MEC_HPD_SIZE);
1496
1497         amdgpu_bo_kunmap(kiq->eop_obj);
1498
1499         return 0;
1500 }
1501
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504         0x7e000209, 0x7e020208,
1505         0x7e040207, 0x7e060206,
1506         0x7e080205, 0x7e0a0204,
1507         0x7e0c0203, 0x7e0e0202,
1508         0x7e100201, 0x7e120200,
1509         0x7e140209, 0x7e160208,
1510         0x7e180207, 0x7e1a0206,
1511         0x7e1c0205, 0x7e1e0204,
1512         0x7e200203, 0x7e220202,
1513         0x7e240201, 0x7e260200,
1514         0x7e280209, 0x7e2a0208,
1515         0x7e2c0207, 0x7e2e0206,
1516         0x7e300205, 0x7e320204,
1517         0x7e340203, 0x7e360202,
1518         0x7e380201, 0x7e3a0200,
1519         0x7e3c0209, 0x7e3e0208,
1520         0x7e400207, 0x7e420206,
1521         0x7e440205, 0x7e460204,
1522         0x7e480203, 0x7e4a0202,
1523         0x7e4c0201, 0x7e4e0200,
1524         0x7e500209, 0x7e520208,
1525         0x7e540207, 0x7e560206,
1526         0x7e580205, 0x7e5a0204,
1527         0x7e5c0203, 0x7e5e0202,
1528         0x7e600201, 0x7e620200,
1529         0x7e640209, 0x7e660208,
1530         0x7e680207, 0x7e6a0206,
1531         0x7e6c0205, 0x7e6e0204,
1532         0x7e700203, 0x7e720202,
1533         0x7e740201, 0x7e760200,
1534         0x7e780209, 0x7e7a0208,
1535         0x7e7c0207, 0x7e7e0206,
1536         0xbf8a0000, 0xbf810000,
1537 };
1538
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541         0xbe8a0100, 0xbe8c0102,
1542         0xbe8e0104, 0xbe900106,
1543         0xbe920108, 0xbe940100,
1544         0xbe960102, 0xbe980104,
1545         0xbe9a0106, 0xbe9c0108,
1546         0xbe9e0100, 0xbea00102,
1547         0xbea20104, 0xbea40106,
1548         0xbea60108, 0xbea80100,
1549         0xbeaa0102, 0xbeac0104,
1550         0xbeae0106, 0xbeb00108,
1551         0xbeb20100, 0xbeb40102,
1552         0xbeb60104, 0xbeb80106,
1553         0xbeba0108, 0xbebc0100,
1554         0xbebe0102, 0xbec00104,
1555         0xbec20106, 0xbec40108,
1556         0xbec60100, 0xbec80102,
1557         0xbee60004, 0xbee70005,
1558         0xbeea0006, 0xbeeb0007,
1559         0xbee80008, 0xbee90009,
1560         0xbefc0000, 0xbf8a0000,
1561         0xbf810000, 0x00000000,
1562 };
1563
1564 static const u32 vgpr_init_regs[] =
1565 {
1566         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567         mmCOMPUTE_RESOURCE_LIMITS, 0,
1568         mmCOMPUTE_NUM_THREAD_X, 256*4,
1569         mmCOMPUTE_NUM_THREAD_Y, 1,
1570         mmCOMPUTE_NUM_THREAD_Z, 1,
1571         mmCOMPUTE_PGM_RSRC2, 20,
1572         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588         mmCOMPUTE_NUM_THREAD_X, 256*5,
1589         mmCOMPUTE_NUM_THREAD_Y, 1,
1590         mmCOMPUTE_NUM_THREAD_Z, 1,
1591         mmCOMPUTE_PGM_RSRC2, 20,
1592         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608         mmCOMPUTE_NUM_THREAD_X, 256*5,
1609         mmCOMPUTE_NUM_THREAD_Y, 1,
1610         mmCOMPUTE_NUM_THREAD_Z, 1,
1611         mmCOMPUTE_PGM_RSRC2, 20,
1612         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626         mmCPC_EDC_ATC_CNT,
1627         mmCPC_EDC_SCRATCH_CNT,
1628         mmCPC_EDC_UCODE_CNT,
1629         mmCPF_EDC_ATC_CNT,
1630         mmCPF_EDC_ROQ_CNT,
1631         mmCPF_EDC_TAG_CNT,
1632         mmCPG_EDC_ATC_CNT,
1633         mmCPG_EDC_DMA_CNT,
1634         mmCPG_EDC_TAG_CNT,
1635         mmDC_EDC_CSINVOC_CNT,
1636         mmDC_EDC_RESTORE_CNT,
1637         mmDC_EDC_STATE_CNT,
1638         mmGDS_EDC_CNT,
1639         mmGDS_EDC_GRBM_CNT,
1640         mmGDS_EDC_OA_DED,
1641         mmSPI_EDC_CNT,
1642         mmSQC_ATC_EDC_GATCL1_CNT,
1643         mmSQC_EDC_CNT,
1644         mmSQ_EDC_DED_CNT,
1645         mmSQ_EDC_INFO,
1646         mmSQ_EDC_SEC_CNT,
1647         mmTCC_EDC_CNT,
1648         mmTCP_ATC_EDC_GATCL1_CNT,
1649         mmTCP_EDC_CNT,
1650         mmTD_EDC_CNT
1651 };
1652
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656         struct amdgpu_ib ib;
1657         struct dma_fence *f = NULL;
1658         int r, i;
1659         u32 tmp;
1660         unsigned total_size, vgpr_offset, sgpr_offset;
1661         u64 gpu_addr;
1662
1663         /* only supported on CZ */
1664         if (adev->asic_type != CHIP_CARRIZO)
1665                 return 0;
1666
1667         /* bail if the compute ring is not ready */
1668         if (!ring->ready)
1669                 return 0;
1670
1671         tmp = RREG32(mmGB_EDC_MODE);
1672         WREG32(mmGB_EDC_MODE, 0);
1673
1674         total_size =
1675                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676         total_size +=
1677                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678         total_size +=
1679                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680         total_size = ALIGN(total_size, 256);
1681         vgpr_offset = total_size;
1682         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683         sgpr_offset = total_size;
1684         total_size += sizeof(sgpr_init_compute_shader);
1685
1686         /* allocate an indirect buffer to put the commands in */
1687         memset(&ib, 0, sizeof(ib));
1688         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691                 return r;
1692         }
1693
1694         /* load the compute shaders */
1695         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701         /* init the ib length to 0 */
1702         ib.length_dw = 0;
1703
1704         /* VGPR */
1705         /* write the register state for the compute dispatch */
1706         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710         }
1711         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718         /* write dispatch packet */
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720         ib.ptr[ib.length_dw++] = 8; /* x */
1721         ib.ptr[ib.length_dw++] = 1; /* y */
1722         ib.ptr[ib.length_dw++] = 1; /* z */
1723         ib.ptr[ib.length_dw++] =
1724                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726         /* write CS partial flush packet */
1727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730         /* SGPR1 */
1731         /* write the register state for the compute dispatch */
1732         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736         }
1737         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744         /* write dispatch packet */
1745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746         ib.ptr[ib.length_dw++] = 8; /* x */
1747         ib.ptr[ib.length_dw++] = 1; /* y */
1748         ib.ptr[ib.length_dw++] = 1; /* z */
1749         ib.ptr[ib.length_dw++] =
1750                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752         /* write CS partial flush packet */
1753         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756         /* SGPR2 */
1757         /* write the register state for the compute dispatch */
1758         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762         }
1763         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770         /* write dispatch packet */
1771         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772         ib.ptr[ib.length_dw++] = 8; /* x */
1773         ib.ptr[ib.length_dw++] = 1; /* y */
1774         ib.ptr[ib.length_dw++] = 1; /* z */
1775         ib.ptr[ib.length_dw++] =
1776                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778         /* write CS partial flush packet */
1779         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782         /* shedule the ib on the ring */
1783         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784         if (r) {
1785                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786                 goto fail;
1787         }
1788
1789         /* wait for the GPU to finish processing the IB */
1790         r = dma_fence_wait(f, false);
1791         if (r) {
1792                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793                 goto fail;
1794         }
1795
1796         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798         WREG32(mmGB_EDC_MODE, tmp);
1799
1800         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805         /* read back registers to clear the counters */
1806         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807                 RREG32(sec_ded_counter_registers[i]);
1808
1809 fail:
1810         amdgpu_ib_free(adev, &ib, NULL);
1811         dma_fence_put(f);
1812
1813         return r;
1814 }
1815
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         u32 mc_shared_chmap, mc_arb_ramcfg;
1820         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821         u32 tmp;
1822         int ret;
1823
1824         switch (adev->asic_type) {
1825         case CHIP_TOPAZ:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_cu_per_sh = 6;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831                 adev->gfx.config.max_texture_channel_caches = 2;
1832                 adev->gfx.config.max_gprs = 256;
1833                 adev->gfx.config.max_gs_threads = 32;
1834                 adev->gfx.config.max_hw_contexts = 8;
1835
1836                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841                 break;
1842         case CHIP_FIJI:
1843                 adev->gfx.config.max_shader_engines = 4;
1844                 adev->gfx.config.max_tile_pipes = 16;
1845                 adev->gfx.config.max_cu_per_sh = 16;
1846                 adev->gfx.config.max_sh_per_se = 1;
1847                 adev->gfx.config.max_backends_per_se = 4;
1848                 adev->gfx.config.max_texture_channel_caches = 16;
1849                 adev->gfx.config.max_gprs = 256;
1850                 adev->gfx.config.max_gs_threads = 32;
1851                 adev->gfx.config.max_hw_contexts = 8;
1852
1853                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858                 break;
1859         case CHIP_POLARIS11:
1860         case CHIP_POLARIS12:
1861                 ret = amdgpu_atombios_get_gfx_info(adev);
1862                 if (ret)
1863                         return ret;
1864                 adev->gfx.config.max_gprs = 256;
1865                 adev->gfx.config.max_gs_threads = 32;
1866                 adev->gfx.config.max_hw_contexts = 8;
1867
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873                 break;
1874         case CHIP_POLARIS10:
1875                 ret = amdgpu_atombios_get_gfx_info(adev);
1876                 if (ret)
1877                         return ret;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_TONGA:
1889                 adev->gfx.config.max_shader_engines = 4;
1890                 adev->gfx.config.max_tile_pipes = 8;
1891                 adev->gfx.config.max_cu_per_sh = 8;
1892                 adev->gfx.config.max_sh_per_se = 1;
1893                 adev->gfx.config.max_backends_per_se = 2;
1894                 adev->gfx.config.max_texture_channel_caches = 8;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 32;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         case CHIP_CARRIZO:
1906                 adev->gfx.config.max_shader_engines = 1;
1907                 adev->gfx.config.max_tile_pipes = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910
1911                 switch (adev->pdev->revision) {
1912                 case 0xc4:
1913                 case 0x84:
1914                 case 0xc8:
1915                 case 0xcc:
1916                 case 0xe1:
1917                 case 0xe3:
1918                         /* B10 */
1919                         adev->gfx.config.max_cu_per_sh = 8;
1920                         break;
1921                 case 0xc5:
1922                 case 0x81:
1923                 case 0x85:
1924                 case 0xc9:
1925                 case 0xcd:
1926                 case 0xe2:
1927                 case 0xe4:
1928                         /* B8 */
1929                         adev->gfx.config.max_cu_per_sh = 6;
1930                         break;
1931                 case 0xc6:
1932                 case 0xca:
1933                 case 0xce:
1934                 case 0x88:
1935                         /* B6 */
1936                         adev->gfx.config.max_cu_per_sh = 6;
1937                         break;
1938                 case 0xc7:
1939                 case 0x87:
1940                 case 0xcb:
1941                 case 0xe5:
1942                 case 0x89:
1943                 default:
1944                         /* B4 */
1945                         adev->gfx.config.max_cu_per_sh = 4;
1946                         break;
1947                 }
1948
1949                 adev->gfx.config.max_texture_channel_caches = 2;
1950                 adev->gfx.config.max_gprs = 256;
1951                 adev->gfx.config.max_gs_threads = 32;
1952                 adev->gfx.config.max_hw_contexts = 8;
1953
1954                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959                 break;
1960         case CHIP_STONEY:
1961                 adev->gfx.config.max_shader_engines = 1;
1962                 adev->gfx.config.max_tile_pipes = 2;
1963                 adev->gfx.config.max_sh_per_se = 1;
1964                 adev->gfx.config.max_backends_per_se = 1;
1965
1966                 switch (adev->pdev->revision) {
1967                 case 0xc0:
1968                 case 0xc1:
1969                 case 0xc2:
1970                 case 0xc4:
1971                 case 0xc8:
1972                 case 0xc9:
1973                         adev->gfx.config.max_cu_per_sh = 3;
1974                         break;
1975                 case 0xd0:
1976                 case 0xd1:
1977                 case 0xd2:
1978                 default:
1979                         adev->gfx.config.max_cu_per_sh = 2;
1980                         break;
1981                 }
1982
1983                 adev->gfx.config.max_texture_channel_caches = 2;
1984                 adev->gfx.config.max_gprs = 256;
1985                 adev->gfx.config.max_gs_threads = 16;
1986                 adev->gfx.config.max_hw_contexts = 8;
1987
1988                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993                 break;
1994         default:
1995                 adev->gfx.config.max_shader_engines = 2;
1996                 adev->gfx.config.max_tile_pipes = 4;
1997                 adev->gfx.config.max_cu_per_sh = 2;
1998                 adev->gfx.config.max_sh_per_se = 1;
1999                 adev->gfx.config.max_backends_per_se = 2;
2000                 adev->gfx.config.max_texture_channel_caches = 4;
2001                 adev->gfx.config.max_gprs = 256;
2002                 adev->gfx.config.max_gs_threads = 32;
2003                 adev->gfx.config.max_hw_contexts = 8;
2004
2005                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010                 break;
2011         }
2012
2013         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016
2017         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018         adev->gfx.config.mem_max_burst_length_bytes = 256;
2019         if (adev->flags & AMD_IS_APU) {
2020                 /* Get memory bank mapping mode. */
2021                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024
2025                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028
2029                 /* Validate settings in case only one DIMM installed. */
2030                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031                         dimm00_addr_map = 0;
2032                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033                         dimm01_addr_map = 0;
2034                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035                         dimm10_addr_map = 0;
2036                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037                         dimm11_addr_map = 0;
2038
2039                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042                         adev->gfx.config.mem_row_size_in_kb = 2;
2043                 else
2044                         adev->gfx.config.mem_row_size_in_kb = 1;
2045         } else {
2046                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049                         adev->gfx.config.mem_row_size_in_kb = 4;
2050         }
2051
2052         adev->gfx.config.shader_engine_tile_size = 32;
2053         adev->gfx.config.num_gpus = 1;
2054         adev->gfx.config.multi_gpu_tile_size = 64;
2055
2056         /* fix up row size */
2057         switch (adev->gfx.config.mem_row_size_in_kb) {
2058         case 1:
2059         default:
2060                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061                 break;
2062         case 2:
2063                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064                 break;
2065         case 4:
2066                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067                 break;
2068         }
2069         adev->gfx.config.gb_addr_config = gb_addr_config;
2070
2071         return 0;
2072 }
2073
2074 static int gfx_v8_0_sw_init(void *handle)
2075 {
2076         int i, r;
2077         struct amdgpu_ring *ring;
2078         struct amdgpu_kiq *kiq;
2079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080
2081         /* KIQ event */
2082         r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2083         if (r)
2084                 return r;
2085
2086         /* EOP Event */
2087         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2088         if (r)
2089                 return r;
2090
2091         /* Privileged reg */
2092         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2093         if (r)
2094                 return r;
2095
2096         /* Privileged inst */
2097         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2098         if (r)
2099                 return r;
2100
2101         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2102
2103         gfx_v8_0_scratch_init(adev);
2104
2105         r = gfx_v8_0_init_microcode(adev);
2106         if (r) {
2107                 DRM_ERROR("Failed to load gfx firmware!\n");
2108                 return r;
2109         }
2110
2111         r = gfx_v8_0_rlc_init(adev);
2112         if (r) {
2113                 DRM_ERROR("Failed to init rlc BOs!\n");
2114                 return r;
2115         }
2116
2117         r = gfx_v8_0_mec_init(adev);
2118         if (r) {
2119                 DRM_ERROR("Failed to init MEC BOs!\n");
2120                 return r;
2121         }
2122
2123         r = gfx_v8_0_kiq_init(adev);
2124         if (r) {
2125                 DRM_ERROR("Failed to init KIQ BOs!\n");
2126                 return r;
2127         }
2128
2129         kiq = &adev->gfx.kiq;
2130         r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2131         if (r)
2132                 return r;
2133
2134         /* set up the gfx ring */
2135         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2136                 ring = &adev->gfx.gfx_ring[i];
2137                 ring->ring_obj = NULL;
2138                 sprintf(ring->name, "gfx");
2139                 /* no gfx doorbells on iceland */
2140                 if (adev->asic_type != CHIP_TOPAZ) {
2141                         ring->use_doorbell = true;
2142                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2143                 }
2144
2145                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2146                                      AMDGPU_CP_IRQ_GFX_EOP);
2147                 if (r)
2148                         return r;
2149         }
2150
2151         /* set up the compute queues */
2152         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2153                 unsigned irq_type;
2154
2155                 /* max 32 queues per MEC */
2156                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2157                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2158                         break;
2159                 }
2160                 ring = &adev->gfx.compute_ring[i];
2161                 ring->ring_obj = NULL;
2162                 ring->use_doorbell = true;
2163                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2164                 ring->me = 1; /* first MEC */
2165                 ring->pipe = i / 8;
2166                 ring->queue = i % 8;
2167                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2168                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2169                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2170                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2171                                      irq_type);
2172                 if (r)
2173                         return r;
2174         }
2175
2176         /* reserve GDS, GWS and OA resource for gfx */
2177         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2178                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2179                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2180         if (r)
2181                 return r;
2182
2183         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2184                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2185                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2186         if (r)
2187                 return r;
2188
2189         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2190                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2191                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2192         if (r)
2193                 return r;
2194
2195         adev->gfx.ce_ram_size = 0x8000;
2196
2197         r = gfx_v8_0_gpu_early_init(adev);
2198         if (r)
2199                 return r;
2200
2201         return 0;
2202 }
2203
2204 static int gfx_v8_0_sw_fini(void *handle)
2205 {
2206         int i;
2207         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2208
2209         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2210         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2211         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2212
2213         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2214                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2215         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2216                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2217         gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2218
2219         gfx_v8_0_kiq_fini(adev);
2220         gfx_v8_0_mec_fini(adev);
2221         gfx_v8_0_rlc_fini(adev);
2222         gfx_v8_0_free_microcode(adev);
2223
2224         return 0;
2225 }
2226
2227 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2228 {
2229         uint32_t *modearray, *mod2array;
2230         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2231         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2232         u32 reg_offset;
2233
2234         modearray = adev->gfx.config.tile_mode_array;
2235         mod2array = adev->gfx.config.macrotile_mode_array;
2236
2237         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2238                 modearray[reg_offset] = 0;
2239
2240         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2241                 mod2array[reg_offset] = 0;
2242
2243         switch (adev->asic_type) {
2244         case CHIP_TOPAZ:
2245                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2) |
2247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                 PIPE_CONFIG(ADDR_SURF_P2) |
2251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P2) |
2255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P2) |
2259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P2) |
2263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P2) |
2267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P2) |
2271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2274                                 PIPE_CONFIG(ADDR_SURF_P2));
2275                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2278                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2287                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2319                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2320                                  PIPE_CONFIG(ADDR_SURF_P2) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2323                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2324                                  PIPE_CONFIG(ADDR_SURF_P2) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2328                                  PIPE_CONFIG(ADDR_SURF_P2) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2332                                  PIPE_CONFIG(ADDR_SURF_P2) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336                                  PIPE_CONFIG(ADDR_SURF_P2) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                  PIPE_CONFIG(ADDR_SURF_P2) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P2) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347
2348                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351                                 NUM_BANKS(ADDR_SURF_8_BANK));
2352                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355                                 NUM_BANKS(ADDR_SURF_8_BANK));
2356                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359                                 NUM_BANKS(ADDR_SURF_8_BANK));
2360                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                 NUM_BANKS(ADDR_SURF_8_BANK));
2364                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367                                 NUM_BANKS(ADDR_SURF_8_BANK));
2368                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                                 NUM_BANKS(ADDR_SURF_8_BANK));
2372                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                 NUM_BANKS(ADDR_SURF_8_BANK));
2376                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2379                                 NUM_BANKS(ADDR_SURF_16_BANK));
2380                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2383                                 NUM_BANKS(ADDR_SURF_16_BANK));
2384                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2387                                  NUM_BANKS(ADDR_SURF_16_BANK));
2388                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2391                                  NUM_BANKS(ADDR_SURF_16_BANK));
2392                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2395                                  NUM_BANKS(ADDR_SURF_16_BANK));
2396                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399                                  NUM_BANKS(ADDR_SURF_16_BANK));
2400                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2403                                  NUM_BANKS(ADDR_SURF_8_BANK));
2404
2405                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2406                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2407                             reg_offset != 23)
2408                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2409
2410                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2411                         if (reg_offset != 7)
2412                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2413
2414                 break;
2415         case CHIP_FIJI:
2416                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538
2539                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542                                 NUM_BANKS(ADDR_SURF_8_BANK));
2543                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546                                 NUM_BANKS(ADDR_SURF_8_BANK));
2547                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550                                 NUM_BANKS(ADDR_SURF_8_BANK));
2551                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                 NUM_BANKS(ADDR_SURF_8_BANK));
2555                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558                                 NUM_BANKS(ADDR_SURF_8_BANK));
2559                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562                                 NUM_BANKS(ADDR_SURF_8_BANK));
2563                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                 NUM_BANKS(ADDR_SURF_8_BANK));
2567                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570                                 NUM_BANKS(ADDR_SURF_8_BANK));
2571                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574                                 NUM_BANKS(ADDR_SURF_8_BANK));
2575                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                  NUM_BANKS(ADDR_SURF_8_BANK));
2579                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582                                  NUM_BANKS(ADDR_SURF_8_BANK));
2583                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586                                  NUM_BANKS(ADDR_SURF_8_BANK));
2587                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590                                  NUM_BANKS(ADDR_SURF_8_BANK));
2591                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594                                  NUM_BANKS(ADDR_SURF_4_BANK));
2595
2596                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2598
2599                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600                         if (reg_offset != 7)
2601                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2602
2603                 break;
2604         case CHIP_TONGA:
2605                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727
2728                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                 NUM_BANKS(ADDR_SURF_16_BANK));
2748                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759                                 NUM_BANKS(ADDR_SURF_16_BANK));
2760                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767                                  NUM_BANKS(ADDR_SURF_16_BANK));
2768                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775                                  NUM_BANKS(ADDR_SURF_8_BANK));
2776                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779                                  NUM_BANKS(ADDR_SURF_4_BANK));
2780                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783                                  NUM_BANKS(ADDR_SURF_4_BANK));
2784
2785                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2787
2788                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789                         if (reg_offset != 7)
2790                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791
2792                 break;
2793         case CHIP_POLARIS11:
2794         case CHIP_POLARIS12:
2795                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917
2918                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946                                 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951                                 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956                                 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961                                 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967
2968                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976                                 NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981                                 NUM_BANKS(ADDR_SURF_8_BANK));
2982
2983                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986                                 NUM_BANKS(ADDR_SURF_4_BANK));
2987
2988                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2990
2991                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992                         if (reg_offset != 7)
2993                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994
2995                 break;
2996         case CHIP_POLARIS10:
2997                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3119
3120                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123                                 NUM_BANKS(ADDR_SURF_16_BANK));
3124
3125                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128                                 NUM_BANKS(ADDR_SURF_16_BANK));
3129
3130                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                 NUM_BANKS(ADDR_SURF_16_BANK));
3134
3135                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                 NUM_BANKS(ADDR_SURF_16_BANK));
3139
3140                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143                                 NUM_BANKS(ADDR_SURF_16_BANK));
3144
3145                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148                                 NUM_BANKS(ADDR_SURF_16_BANK));
3149
3150                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153                                 NUM_BANKS(ADDR_SURF_16_BANK));
3154
3155                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158                                 NUM_BANKS(ADDR_SURF_16_BANK));
3159
3160                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163                                 NUM_BANKS(ADDR_SURF_16_BANK));
3164
3165                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168                                 NUM_BANKS(ADDR_SURF_16_BANK));
3169
3170                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173                                 NUM_BANKS(ADDR_SURF_16_BANK));
3174
3175                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179
3180                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183                                 NUM_BANKS(ADDR_SURF_4_BANK));
3184
3185                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188                                 NUM_BANKS(ADDR_SURF_4_BANK));
3189
3190                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3192
3193                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194                         if (reg_offset != 7)
3195                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196
3197                 break;
3198         case CHIP_STONEY:
3199                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                 PIPE_CONFIG(ADDR_SURF_P2) |
3201                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                 PIPE_CONFIG(ADDR_SURF_P2) |
3205                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208                                 PIPE_CONFIG(ADDR_SURF_P2) |
3209                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212                                 PIPE_CONFIG(ADDR_SURF_P2) |
3213                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216                                 PIPE_CONFIG(ADDR_SURF_P2) |
3217                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220                                 PIPE_CONFIG(ADDR_SURF_P2) |
3221                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                 PIPE_CONFIG(ADDR_SURF_P2) |
3225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2));
3229                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234                                  PIPE_CONFIG(ADDR_SURF_P2) |
3235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                  PIPE_CONFIG(ADDR_SURF_P2) |
3239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242                                  PIPE_CONFIG(ADDR_SURF_P2) |
3243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                  PIPE_CONFIG(ADDR_SURF_P2) |
3247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258                                  PIPE_CONFIG(ADDR_SURF_P2) |
3259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266                                  PIPE_CONFIG(ADDR_SURF_P2) |
3267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270                                  PIPE_CONFIG(ADDR_SURF_P2) |
3271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274                                  PIPE_CONFIG(ADDR_SURF_P2) |
3275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278                                  PIPE_CONFIG(ADDR_SURF_P2) |
3279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282                                  PIPE_CONFIG(ADDR_SURF_P2) |
3283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286                                  PIPE_CONFIG(ADDR_SURF_P2) |
3287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290                                  PIPE_CONFIG(ADDR_SURF_P2) |
3291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                  PIPE_CONFIG(ADDR_SURF_P2) |
3295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298                                  PIPE_CONFIG(ADDR_SURF_P2) |
3299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3301
3302                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305                                 NUM_BANKS(ADDR_SURF_8_BANK));
3306                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                 NUM_BANKS(ADDR_SURF_8_BANK));
3310                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313                                 NUM_BANKS(ADDR_SURF_8_BANK));
3314                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317                                 NUM_BANKS(ADDR_SURF_8_BANK));
3318                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321                                 NUM_BANKS(ADDR_SURF_8_BANK));
3322                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325                                 NUM_BANKS(ADDR_SURF_8_BANK));
3326                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329                                 NUM_BANKS(ADDR_SURF_8_BANK));
3330                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333                                 NUM_BANKS(ADDR_SURF_16_BANK));
3334                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                 NUM_BANKS(ADDR_SURF_16_BANK));
3338                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341                                  NUM_BANKS(ADDR_SURF_16_BANK));
3342                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345                                  NUM_BANKS(ADDR_SURF_16_BANK));
3346                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353                                  NUM_BANKS(ADDR_SURF_16_BANK));
3354                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357                                  NUM_BANKS(ADDR_SURF_8_BANK));
3358
3359                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361                             reg_offset != 23)
3362                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3363
3364                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365                         if (reg_offset != 7)
3366                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3367
3368                 break;
3369         default:
3370                 dev_warn(adev->dev,
3371                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372                          adev->asic_type);
3373
3374         case CHIP_CARRIZO:
3375                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376                                 PIPE_CONFIG(ADDR_SURF_P2) |
3377                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                 PIPE_CONFIG(ADDR_SURF_P2) |
3381                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384                                 PIPE_CONFIG(ADDR_SURF_P2) |
3385                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388                                 PIPE_CONFIG(ADDR_SURF_P2) |
3389                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392                                 PIPE_CONFIG(ADDR_SURF_P2) |
3393                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396                                 PIPE_CONFIG(ADDR_SURF_P2) |
3397                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400                                 PIPE_CONFIG(ADDR_SURF_P2) |
3401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2));
3405                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410                                  PIPE_CONFIG(ADDR_SURF_P2) |
3411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414                                  PIPE_CONFIG(ADDR_SURF_P2) |
3415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418                                  PIPE_CONFIG(ADDR_SURF_P2) |
3419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422                                  PIPE_CONFIG(ADDR_SURF_P2) |
3423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426                                  PIPE_CONFIG(ADDR_SURF_P2) |
3427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430                                  PIPE_CONFIG(ADDR_SURF_P2) |
3431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434                                  PIPE_CONFIG(ADDR_SURF_P2) |
3435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438                                  PIPE_CONFIG(ADDR_SURF_P2) |
3439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442                                  PIPE_CONFIG(ADDR_SURF_P2) |
3443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446                                  PIPE_CONFIG(ADDR_SURF_P2) |
3447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450                                  PIPE_CONFIG(ADDR_SURF_P2) |
3451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454                                  PIPE_CONFIG(ADDR_SURF_P2) |
3455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458                                  PIPE_CONFIG(ADDR_SURF_P2) |
3459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462                                  PIPE_CONFIG(ADDR_SURF_P2) |
3463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466                                  PIPE_CONFIG(ADDR_SURF_P2) |
3467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470                                  PIPE_CONFIG(ADDR_SURF_P2) |
3471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474                                  PIPE_CONFIG(ADDR_SURF_P2) |
3475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3477
3478                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481                                 NUM_BANKS(ADDR_SURF_8_BANK));
3482                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485                                 NUM_BANKS(ADDR_SURF_8_BANK));
3486                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489                                 NUM_BANKS(ADDR_SURF_8_BANK));
3490                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493                                 NUM_BANKS(ADDR_SURF_8_BANK));
3494                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497                                 NUM_BANKS(ADDR_SURF_8_BANK));
3498                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501                                 NUM_BANKS(ADDR_SURF_8_BANK));
3502                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505                                 NUM_BANKS(ADDR_SURF_8_BANK));
3506                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509                                 NUM_BANKS(ADDR_SURF_16_BANK));
3510                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513                                 NUM_BANKS(ADDR_SURF_16_BANK));
3514                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517                                  NUM_BANKS(ADDR_SURF_16_BANK));
3518                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521                                  NUM_BANKS(ADDR_SURF_16_BANK));
3522                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525                                  NUM_BANKS(ADDR_SURF_16_BANK));
3526                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529                                  NUM_BANKS(ADDR_SURF_16_BANK));
3530                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533                                  NUM_BANKS(ADDR_SURF_8_BANK));
3534
3535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537                             reg_offset != 23)
3538                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3539
3540                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541                         if (reg_offset != 7)
3542                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3543
3544                 break;
3545         }
3546 }
3547
3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3549                                   u32 se_num, u32 sh_num, u32 instance)
3550 {
3551         u32 data;
3552
3553         if (instance == 0xffffffff)
3554                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555         else
3556                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3557
3558         if (se_num == 0xffffffff)
3559                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3560         else
3561                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3562
3563         if (sh_num == 0xffffffff)
3564                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565         else
3566                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3567
3568         WREG32(mmGRBM_GFX_INDEX, data);
3569 }
3570
3571 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3572 {
3573         return (u32)((1ULL << bit_width) - 1);
3574 }
3575
3576 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3577 {
3578         u32 data, mask;
3579
3580         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3581                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3582
3583         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3584
3585         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3586                                        adev->gfx.config.max_sh_per_se);
3587
3588         return (~data) & mask;
3589 }
3590
3591 static void
3592 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3593 {
3594         switch (adev->asic_type) {
3595         case CHIP_FIJI:
3596                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597                           RB_XSEL2(1) | PKR_MAP(2) |
3598                           PKR_XSEL(1) | PKR_YSEL(1) |
3599                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3601                            SE_PAIR_YSEL(2);
3602                 break;
3603         case CHIP_TONGA:
3604         case CHIP_POLARIS10:
3605                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606                           SE_XSEL(1) | SE_YSEL(1);
3607                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3608                            SE_PAIR_YSEL(2);
3609                 break;
3610         case CHIP_TOPAZ:
3611         case CHIP_CARRIZO:
3612                 *rconf |= RB_MAP_PKR0(2);
3613                 *rconf1 |= 0x0;
3614                 break;
3615         case CHIP_POLARIS11:
3616         case CHIP_POLARIS12:
3617                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618                           SE_XSEL(1) | SE_YSEL(1);
3619                 *rconf1 |= 0x0;
3620                 break;
3621         case CHIP_STONEY:
3622                 *rconf |= 0x0;
3623                 *rconf1 |= 0x0;
3624                 break;
3625         default:
3626                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3627                 break;
3628         }
3629 }
3630
3631 static void
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633                                         u32 raster_config, u32 raster_config_1,
3634                                         unsigned rb_mask, unsigned num_rb)
3635 {
3636         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639         unsigned rb_per_se = num_rb / num_se;
3640         unsigned se_mask[4];
3641         unsigned se;
3642
3643         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3647
3648         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3651
3652         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653                              (!se_mask[2] && !se_mask[3]))) {
3654                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3655
3656                 if (!se_mask[0] && !se_mask[1]) {
3657                         raster_config_1 |=
3658                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3659                 } else {
3660                         raster_config_1 |=
3661                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3662                 }
3663         }
3664
3665         for (se = 0; se < num_se; se++) {
3666                 unsigned raster_config_se = raster_config;
3667                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669                 int idx = (se / 2) * 2;
3670
3671                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672                         raster_config_se &= ~SE_MAP_MASK;
3673
3674                         if (!se_mask[idx]) {
3675                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3676                         } else {
3677                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3678                         }
3679                 }
3680
3681                 pkr0_mask &= rb_mask;
3682                 pkr1_mask &= rb_mask;
3683                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684                         raster_config_se &= ~PKR_MAP_MASK;
3685
3686                         if (!pkr0_mask) {
3687                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3688                         } else {
3689                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3690                         }
3691                 }
3692
3693                 if (rb_per_se >= 2) {
3694                         unsigned rb0_mask = 1 << (se * rb_per_se);
3695                         unsigned rb1_mask = rb0_mask << 1;
3696
3697                         rb0_mask &= rb_mask;
3698                         rb1_mask &= rb_mask;
3699                         if (!rb0_mask || !rb1_mask) {
3700                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3701
3702                                 if (!rb0_mask) {
3703                                         raster_config_se |=
3704                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3705                                 } else {
3706                                         raster_config_se |=
3707                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3708                                 }
3709                         }
3710
3711                         if (rb_per_se > 2) {
3712                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713                                 rb1_mask = rb0_mask << 1;
3714                                 rb0_mask &= rb_mask;
3715                                 rb1_mask &= rb_mask;
3716                                 if (!rb0_mask || !rb1_mask) {
3717                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3718
3719                                         if (!rb0_mask) {
3720                                                 raster_config_se |=
3721                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3722                                         } else {
3723                                                 raster_config_se |=
3724                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3725                                         }
3726                                 }
3727                         }
3728                 }
3729
3730                 /* GRBM_GFX_INDEX has a different offset on VI */
3731                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734         }
3735
3736         /* GRBM_GFX_INDEX has a different offset on VI */
3737         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3738 }
3739
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3741 {
3742         int i, j;
3743         u32 data;
3744         u32 raster_config = 0, raster_config_1 = 0;
3745         u32 active_rbs = 0;
3746         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747                                         adev->gfx.config.max_sh_per_se;
3748         unsigned num_rb_pipes;
3749
3750         mutex_lock(&adev->grbm_idx_mutex);
3751         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3755                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756                                                rb_bitmap_width_per_sh);
3757                 }
3758         }
3759         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760
3761         adev->gfx.config.backend_enable_mask = active_rbs;
3762         adev->gfx.config.num_rbs = hweight32(active_rbs);
3763
3764         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765                              adev->gfx.config.max_shader_engines, 16);
3766
3767         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3768
3769         if (!adev->gfx.config.backend_enable_mask ||
3770                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3771                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3773         } else {
3774                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775                                                         adev->gfx.config.backend_enable_mask,
3776                                                         num_rb_pipes);
3777         }
3778
3779         /* cache the values for userspace */
3780         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3785                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787                         adev->gfx.config.rb_config[i][j].raster_config =
3788                                 RREG32(mmPA_SC_RASTER_CONFIG);
3789                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3790                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3791                 }
3792         }
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794         mutex_unlock(&adev->grbm_idx_mutex);
3795 }
3796
3797 /**
3798  * gfx_v8_0_init_compute_vmid - gart enable
3799  *
3800  * @rdev: amdgpu_device pointer
3801  *
3802  * Initialize compute vmid sh_mem registers
3803  *
3804  */
3805 #define DEFAULT_SH_MEM_BASES    (0x6000)
3806 #define FIRST_COMPUTE_VMID      (8)
3807 #define LAST_COMPUTE_VMID       (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3809 {
3810         int i;
3811         uint32_t sh_mem_config;
3812         uint32_t sh_mem_bases;
3813
3814         /*
3815          * Configure apertures:
3816          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3819          */
3820         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3821
3822         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3828
3829         mutex_lock(&adev->srbm_mutex);
3830         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831                 vi_srbm_select(adev, 0, 0, 0, i);
3832                 /* CP and shaders */
3833                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834                 WREG32(mmSH_MEM_APE1_BASE, 1);
3835                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3837         }
3838         vi_srbm_select(adev, 0, 0, 0, 0);
3839         mutex_unlock(&adev->srbm_mutex);
3840 }
3841
3842 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3843 {
3844         u32 tmp;
3845         int i;
3846
3847         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3848         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3849         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3850         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3851
3852         gfx_v8_0_tiling_mode_table_init(adev);
3853         gfx_v8_0_setup_rb(adev);
3854         gfx_v8_0_get_cu_info(adev);
3855
3856         /* XXX SH_MEM regs */
3857         /* where to put LDS, scratch, GPUVM in FSA64 space */
3858         mutex_lock(&adev->srbm_mutex);
3859         for (i = 0; i < 16; i++) {
3860                 vi_srbm_select(adev, 0, 0, 0, i);
3861                 /* CP and shaders */
3862                 if (i == 0) {
3863                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3864                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3865                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3866                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3867                         WREG32(mmSH_MEM_CONFIG, tmp);
3868                 } else {
3869                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3870                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3871                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3872                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3873                         WREG32(mmSH_MEM_CONFIG, tmp);
3874                 }
3875
3876                 WREG32(mmSH_MEM_APE1_BASE, 1);
3877                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3878                 WREG32(mmSH_MEM_BASES, 0);
3879         }
3880         vi_srbm_select(adev, 0, 0, 0, 0);
3881         mutex_unlock(&adev->srbm_mutex);
3882
3883         gfx_v8_0_init_compute_vmid(adev);
3884
3885         mutex_lock(&adev->grbm_idx_mutex);
3886         /*
3887          * making sure that the following register writes will be broadcasted
3888          * to all the shaders
3889          */
3890         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3891
3892         WREG32(mmPA_SC_FIFO_SIZE,
3893                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3894                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3895                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3896                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3897                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3898                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3899                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3900                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3901         mutex_unlock(&adev->grbm_idx_mutex);
3902
3903 }
3904
3905 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3906 {
3907         u32 i, j, k;
3908         u32 mask;
3909
3910         mutex_lock(&adev->grbm_idx_mutex);
3911         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3912                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3913                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3914                         for (k = 0; k < adev->usec_timeout; k++) {
3915                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3916                                         break;
3917                                 udelay(1);
3918                         }
3919                 }
3920         }
3921         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3922         mutex_unlock(&adev->grbm_idx_mutex);
3923
3924         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3925                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3926                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3927                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3928         for (k = 0; k < adev->usec_timeout; k++) {
3929                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3930                         break;
3931                 udelay(1);
3932         }
3933 }
3934
3935 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3936                                                bool enable)
3937 {
3938         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3939
3940         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3941         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3942         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3943         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3944
3945         WREG32(mmCP_INT_CNTL_RING0, tmp);
3946 }
3947
3948 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3949 {
3950         /* csib */
3951         WREG32(mmRLC_CSIB_ADDR_HI,
3952                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3953         WREG32(mmRLC_CSIB_ADDR_LO,
3954                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3955         WREG32(mmRLC_CSIB_LENGTH,
3956                         adev->gfx.rlc.clear_state_size);
3957 }
3958
3959 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3960                                 int ind_offset,
3961                                 int list_size,
3962                                 int *unique_indices,
3963                                 int *indices_count,
3964                                 int max_indices,
3965                                 int *ind_start_offsets,
3966                                 int *offset_count,
3967                                 int max_offset)
3968 {
3969         int indices;
3970         bool new_entry = true;
3971
3972         for (; ind_offset < list_size; ind_offset++) {
3973
3974                 if (new_entry) {
3975                         new_entry = false;
3976                         ind_start_offsets[*offset_count] = ind_offset;
3977                         *offset_count = *offset_count + 1;
3978                         BUG_ON(*offset_count >= max_offset);
3979                 }
3980
3981                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3982                         new_entry = true;
3983                         continue;
3984                 }
3985
3986                 ind_offset += 2;
3987
3988                 /* look for the matching indice */
3989                 for (indices = 0;
3990                         indices < *indices_count;
3991                         indices++) {
3992                         if (unique_indices[indices] ==
3993                                 register_list_format[ind_offset])
3994                                 break;
3995                 }
3996
3997                 if (indices >= *indices_count) {
3998                         unique_indices[*indices_count] =
3999                                 register_list_format[ind_offset];
4000                         indices = *indices_count;
4001                         *indices_count = *indices_count + 1;
4002                         BUG_ON(*indices_count >= max_indices);
4003                 }
4004
4005                 register_list_format[ind_offset] = indices;
4006         }
4007 }
4008
4009 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4010 {
4011         int i, temp, data;
4012         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4013         int indices_count = 0;
4014         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4015         int offset_count = 0;
4016
4017         int list_size;
4018         unsigned int *register_list_format =
4019                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4020         if (!register_list_format)
4021                 return -ENOMEM;
4022         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4023                         adev->gfx.rlc.reg_list_format_size_bytes);
4024
4025         gfx_v8_0_parse_ind_reg_list(register_list_format,
4026                                 RLC_FormatDirectRegListLength,
4027                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4028                                 unique_indices,
4029                                 &indices_count,
4030                                 sizeof(unique_indices) / sizeof(int),
4031                                 indirect_start_offsets,
4032                                 &offset_count,
4033                                 sizeof(indirect_start_offsets)/sizeof(int));
4034
4035         /* save and restore list */
4036         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4037
4038         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4039         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4040                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4041
4042         /* indirect list */
4043         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4044         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4045                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4046
4047         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4048         list_size = list_size >> 1;
4049         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4050         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4051
4052         /* starting offsets starts */
4053         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4054                 adev->gfx.rlc.starting_offsets_start);
4055         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4056                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4057                                 indirect_start_offsets[i]);
4058
4059         /* unique indices */
4060         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4061         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4062         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4063                 if (unique_indices[i] != 0) {
4064                         amdgpu_mm_wreg(adev, temp + i,
4065                                         unique_indices[i] & 0x3FFFF, false);
4066                         amdgpu_mm_wreg(adev, data + i,
4067                                         unique_indices[i] >> 20, false);
4068                 }
4069         }
4070         kfree(register_list_format);
4071
4072         return 0;
4073 }
4074
4075 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4076 {
4077         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4078 }
4079
4080 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4081 {
4082         uint32_t data;
4083
4084         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4085
4086         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4087         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4088         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4089         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4090         WREG32(mmRLC_PG_DELAY, data);
4091
4092         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4093         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4094
4095 }
4096
4097 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4098                                                 bool enable)
4099 {
4100         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4101 }
4102
4103 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4104                                                   bool enable)
4105 {
4106         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4107 }
4108
4109 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4110 {
4111         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4112 }
4113
4114 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4115 {
4116         if ((adev->asic_type == CHIP_CARRIZO) ||
4117             (adev->asic_type == CHIP_STONEY)) {
4118                 gfx_v8_0_init_csb(adev);
4119                 gfx_v8_0_init_save_restore_list(adev);
4120                 gfx_v8_0_enable_save_restore_machine(adev);
4121                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4122                 gfx_v8_0_init_power_gating(adev);
4123                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4124         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4125                    (adev->asic_type == CHIP_POLARIS12)) {
4126                 gfx_v8_0_init_csb(adev);
4127                 gfx_v8_0_init_save_restore_list(adev);
4128                 gfx_v8_0_enable_save_restore_machine(adev);
4129                 gfx_v8_0_init_power_gating(adev);
4130         }
4131
4132 }
4133
4134 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4135 {
4136         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4137
4138         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4139         gfx_v8_0_wait_for_rlc_serdes(adev);
4140 }
4141
4142 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4143 {
4144         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4145         udelay(50);
4146
4147         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4148         udelay(50);
4149 }
4150
4151 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4152 {
4153         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4154
4155         /* carrizo do enable cp interrupt after cp inited */
4156         if (!(adev->flags & AMD_IS_APU))
4157                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4158
4159         udelay(50);
4160 }
4161
4162 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4163 {
4164         const struct rlc_firmware_header_v2_0 *hdr;
4165         const __le32 *fw_data;
4166         unsigned i, fw_size;
4167
4168         if (!adev->gfx.rlc_fw)
4169                 return -EINVAL;
4170
4171         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4172         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4173
4174         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4175                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4176         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4177
4178         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4179         for (i = 0; i < fw_size; i++)
4180                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4181         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4182
4183         return 0;
4184 }
4185
4186 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4187 {
4188         int r;
4189         u32 tmp;
4190
4191         gfx_v8_0_rlc_stop(adev);
4192
4193         /* disable CG */
4194         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4195         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4196                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4197         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4198         if (adev->asic_type == CHIP_POLARIS11 ||
4199             adev->asic_type == CHIP_POLARIS10 ||
4200             adev->asic_type == CHIP_POLARIS12) {
4201                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4202                 tmp &= ~0x3;
4203                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4204         }
4205
4206         /* disable PG */
4207         WREG32(mmRLC_PG_CNTL, 0);
4208
4209         gfx_v8_0_rlc_reset(adev);
4210         gfx_v8_0_init_pg(adev);
4211
4212         if (!adev->pp_enabled) {
4213                 if (!adev->firmware.smu_load) {
4214                         /* legacy rlc firmware loading */
4215                         r = gfx_v8_0_rlc_load_microcode(adev);
4216                         if (r)
4217                                 return r;
4218                 } else {
4219                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4220                                                         AMDGPU_UCODE_ID_RLC_G);
4221                         if (r)
4222                                 return -EINVAL;
4223                 }
4224         }
4225
4226         gfx_v8_0_rlc_start(adev);
4227
4228         return 0;
4229 }
4230
4231 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4232 {
4233         int i;
4234         u32 tmp = RREG32(mmCP_ME_CNTL);
4235
4236         if (enable) {
4237                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4238                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4239                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4240         } else {
4241                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4242                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4243                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4244                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4245                         adev->gfx.gfx_ring[i].ready = false;
4246         }
4247         WREG32(mmCP_ME_CNTL, tmp);
4248         udelay(50);
4249 }
4250
4251 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4252 {
4253         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4254         const struct gfx_firmware_header_v1_0 *ce_hdr;
4255         const struct gfx_firmware_header_v1_0 *me_hdr;
4256         const __le32 *fw_data;
4257         unsigned i, fw_size;
4258
4259         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4260                 return -EINVAL;
4261
4262         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4263                 adev->gfx.pfp_fw->data;
4264         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4265                 adev->gfx.ce_fw->data;
4266         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4267                 adev->gfx.me_fw->data;
4268
4269         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4270         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4271         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4272
4273         gfx_v8_0_cp_gfx_enable(adev, false);
4274
4275         /* PFP */
4276         fw_data = (const __le32 *)
4277                 (adev->gfx.pfp_fw->data +
4278                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4279         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4280         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4281         for (i = 0; i < fw_size; i++)
4282                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4283         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4284
4285         /* CE */
4286         fw_data = (const __le32 *)
4287                 (adev->gfx.ce_fw->data +
4288                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4289         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4290         WREG32(mmCP_CE_UCODE_ADDR, 0);
4291         for (i = 0; i < fw_size; i++)
4292                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4293         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4294
4295         /* ME */
4296         fw_data = (const __le32 *)
4297                 (adev->gfx.me_fw->data +
4298                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4299         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4300         WREG32(mmCP_ME_RAM_WADDR, 0);
4301         for (i = 0; i < fw_size; i++)
4302                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4303         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4304
4305         return 0;
4306 }
4307
4308 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4309 {
4310         u32 count = 0;
4311         const struct cs_section_def *sect = NULL;
4312         const struct cs_extent_def *ext = NULL;
4313
4314         /* begin clear state */
4315         count += 2;
4316         /* context control state */
4317         count += 3;
4318
4319         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4320                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4321                         if (sect->id == SECT_CONTEXT)
4322                                 count += 2 + ext->reg_count;
4323                         else
4324                                 return 0;
4325                 }
4326         }
4327         /* pa_sc_raster_config/pa_sc_raster_config1 */
4328         count += 4;
4329         /* end clear state */
4330         count += 2;
4331         /* clear state */
4332         count += 2;
4333
4334         return count;
4335 }
4336
4337 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4338 {
4339         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4340         const struct cs_section_def *sect = NULL;
4341         const struct cs_extent_def *ext = NULL;
4342         int r, i;
4343
4344         /* init the CP */
4345         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4346         WREG32(mmCP_ENDIAN_SWAP, 0);
4347         WREG32(mmCP_DEVICE_ID, 1);
4348
4349         gfx_v8_0_cp_gfx_enable(adev, true);
4350
4351         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4352         if (r) {
4353                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4354                 return r;
4355         }
4356
4357         /* clear state buffer */
4358         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4359         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4360
4361         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4362         amdgpu_ring_write(ring, 0x80000000);
4363         amdgpu_ring_write(ring, 0x80000000);
4364
4365         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4366                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4367                         if (sect->id == SECT_CONTEXT) {
4368                                 amdgpu_ring_write(ring,
4369                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4370                                                ext->reg_count));
4371                                 amdgpu_ring_write(ring,
4372                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4373                                 for (i = 0; i < ext->reg_count; i++)
4374                                         amdgpu_ring_write(ring, ext->extent[i]);
4375                         }
4376                 }
4377         }
4378
4379         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4380         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4381         switch (adev->asic_type) {
4382         case CHIP_TONGA:
4383         case CHIP_POLARIS10:
4384                 amdgpu_ring_write(ring, 0x16000012);
4385                 amdgpu_ring_write(ring, 0x0000002A);
4386                 break;
4387         case CHIP_POLARIS11:
4388         case CHIP_POLARIS12:
4389                 amdgpu_ring_write(ring, 0x16000012);
4390                 amdgpu_ring_write(ring, 0x00000000);
4391                 break;
4392         case CHIP_FIJI:
4393                 amdgpu_ring_write(ring, 0x3a00161a);
4394                 amdgpu_ring_write(ring, 0x0000002e);
4395                 break;
4396         case CHIP_CARRIZO:
4397                 amdgpu_ring_write(ring, 0x00000002);
4398                 amdgpu_ring_write(ring, 0x00000000);
4399                 break;
4400         case CHIP_TOPAZ:
4401                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4402                                 0x00000000 : 0x00000002);
4403                 amdgpu_ring_write(ring, 0x00000000);
4404                 break;
4405         case CHIP_STONEY:
4406                 amdgpu_ring_write(ring, 0x00000000);
4407                 amdgpu_ring_write(ring, 0x00000000);
4408                 break;
4409         default:
4410                 BUG();
4411         }
4412
4413         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4414         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4415
4416         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4417         amdgpu_ring_write(ring, 0);
4418
4419         /* init the CE partitions */
4420         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4421         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4422         amdgpu_ring_write(ring, 0x8000);
4423         amdgpu_ring_write(ring, 0x8000);
4424
4425         amdgpu_ring_commit(ring);
4426
4427         return 0;
4428 }
4429
4430 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4431 {
4432         struct amdgpu_ring *ring;
4433         u32 tmp;
4434         u32 rb_bufsz;
4435         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4436         int r;
4437
4438         /* Set the write pointer delay */
4439         WREG32(mmCP_RB_WPTR_DELAY, 0);
4440
4441         /* set the RB to use vmid 0 */
4442         WREG32(mmCP_RB_VMID, 0);
4443
4444         /* Set ring buffer size */
4445         ring = &adev->gfx.gfx_ring[0];
4446         rb_bufsz = order_base_2(ring->ring_size / 8);
4447         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4448         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4449         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4450         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4451 #ifdef __BIG_ENDIAN
4452         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4453 #endif
4454         WREG32(mmCP_RB0_CNTL, tmp);
4455
4456         /* Initialize the ring buffer's read and write pointers */
4457         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4458         ring->wptr = 0;
4459         WREG32(mmCP_RB0_WPTR, ring->wptr);
4460
4461         /* set the wb address wether it's enabled or not */
4462         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4463         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4464         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4465
4466         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4467         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4468         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4469         mdelay(1);
4470         WREG32(mmCP_RB0_CNTL, tmp);
4471
4472         rb_addr = ring->gpu_addr >> 8;
4473         WREG32(mmCP_RB0_BASE, rb_addr);
4474         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4475
4476         /* no gfx doorbells on iceland */
4477         if (adev->asic_type != CHIP_TOPAZ) {
4478                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4479                 if (ring->use_doorbell) {
4480                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4481                                             DOORBELL_OFFSET, ring->doorbell_index);
4482                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4483                                             DOORBELL_HIT, 0);
4484                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4485                                             DOORBELL_EN, 1);
4486                 } else {
4487                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4488                                             DOORBELL_EN, 0);
4489                 }
4490                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4491
4492                 if (adev->asic_type == CHIP_TONGA) {
4493                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4494                                             DOORBELL_RANGE_LOWER,
4495                                             AMDGPU_DOORBELL_GFX_RING0);
4496                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4497
4498                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4499                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4500                 }
4501
4502         }
4503
4504         /* start the ring */
4505         gfx_v8_0_cp_gfx_start(adev);
4506         ring->ready = true;
4507         r = amdgpu_ring_test_ring(ring);
4508         if (r)
4509                 ring->ready = false;
4510
4511         return r;
4512 }
4513
4514 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4515 {
4516         int i;
4517
4518         if (enable) {
4519                 WREG32(mmCP_MEC_CNTL, 0);
4520         } else {
4521                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4522                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4523                         adev->gfx.compute_ring[i].ready = false;
4524         }
4525         udelay(50);
4526 }
4527
4528 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4529 {
4530         const struct gfx_firmware_header_v1_0 *mec_hdr;
4531         const __le32 *fw_data;
4532         unsigned i, fw_size;
4533
4534         if (!adev->gfx.mec_fw)
4535                 return -EINVAL;
4536
4537         gfx_v8_0_cp_compute_enable(adev, false);
4538
4539         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4540         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4541
4542         fw_data = (const __le32 *)
4543                 (adev->gfx.mec_fw->data +
4544                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4545         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4546
4547         /* MEC1 */
4548         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4549         for (i = 0; i < fw_size; i++)
4550                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4551         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4552
4553         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4554         if (adev->gfx.mec2_fw) {
4555                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4556
4557                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4558                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4559
4560                 fw_data = (const __le32 *)
4561                         (adev->gfx.mec2_fw->data +
4562                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4563                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4564
4565                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4566                 for (i = 0; i < fw_size; i++)
4567                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4568                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4569         }
4570
4571         return 0;
4572 }
4573
4574 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4575 {
4576         int i, r;
4577
4578         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4579                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4580
4581                 if (ring->mqd_obj) {
4582                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4583                         if (unlikely(r != 0))
4584                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4585
4586                         amdgpu_bo_unpin(ring->mqd_obj);
4587                         amdgpu_bo_unreserve(ring->mqd_obj);
4588
4589                         amdgpu_bo_unref(&ring->mqd_obj);
4590                         ring->mqd_obj = NULL;
4591                 }
4592         }
4593 }
4594
4595 /* KIQ functions */
4596 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4597 {
4598         uint32_t tmp;
4599         struct amdgpu_device *adev = ring->adev;
4600
4601         /* tell RLC which is KIQ queue */
4602         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4603         tmp &= 0xffffff00;
4604         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4605         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4606         tmp |= 0x80;
4607         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4608 }
4609
4610 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4611 {
4612         amdgpu_ring_alloc(ring, 8);
4613         /* set resources */
4614         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4615         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4616         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4617         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4618         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4619         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4620         amdgpu_ring_write(ring, 0);     /* oac mask */
4621         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4622         amdgpu_ring_commit(ring);
4623         udelay(50);
4624 }
4625
4626 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4627                                    struct amdgpu_ring *ring)
4628 {
4629         struct amdgpu_device *adev = kiq_ring->adev;
4630         uint64_t mqd_addr, wptr_addr;
4631
4632         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4633         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4634         amdgpu_ring_alloc(kiq_ring, 8);
4635
4636         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4637         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4638         amdgpu_ring_write(kiq_ring, 0x21010000);
4639         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4640                         (ring->queue << 26) |
4641                         (ring->pipe << 29) |
4642                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4643         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4644         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4645         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4646         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4647         amdgpu_ring_commit(kiq_ring);
4648         udelay(50);
4649 }
4650
4651 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4652                              struct vi_mqd *mqd,
4653                              uint64_t mqd_gpu_addr,
4654                              uint64_t eop_gpu_addr,
4655                              struct amdgpu_ring *ring)
4656 {
4657         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4658         uint32_t tmp;
4659
4660         mqd->header = 0xC0310800;
4661         mqd->compute_pipelinestat_enable = 0x00000001;
4662         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4663         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4664         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4665         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4666         mqd->compute_misc_reserved = 0x00000003;
4667
4668         eop_base_addr = eop_gpu_addr >> 8;
4669         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4670         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4671
4672         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4673         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4674         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4675                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4676
4677         mqd->cp_hqd_eop_control = tmp;
4678
4679         /* enable doorbell? */
4680         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4681
4682         if (ring->use_doorbell)
4683                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4684                                          DOORBELL_EN, 1);
4685         else
4686                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4687                                          DOORBELL_EN, 0);
4688
4689         mqd->cp_hqd_pq_doorbell_control = tmp;
4690
4691         /* disable the queue if it's active */
4692         mqd->cp_hqd_dequeue_request = 0;
4693         mqd->cp_hqd_pq_rptr = 0;
4694         mqd->cp_hqd_pq_wptr = 0;
4695
4696         /* set the pointer to the MQD */
4697         mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4698         mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4699
4700         /* set MQD vmid to 0 */
4701         tmp = RREG32(mmCP_MQD_CONTROL);
4702         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4703         mqd->cp_mqd_control = tmp;
4704
4705         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4706         hqd_gpu_addr = ring->gpu_addr >> 8;
4707         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4708         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4709
4710         /* set up the HQD, this is similar to CP_RB0_CNTL */
4711         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4712         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4713                             (order_base_2(ring->ring_size / 4) - 1));
4714         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4715                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4716 #ifdef __BIG_ENDIAN
4717         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4718 #endif
4719         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4720         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4721         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4722         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4723         mqd->cp_hqd_pq_control = tmp;
4724
4725         /* set the wb address whether it's enabled or not */
4726         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4727         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4728         mqd->cp_hqd_pq_rptr_report_addr_hi =
4729                 upper_32_bits(wb_gpu_addr) & 0xffff;
4730
4731         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4732         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4733         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4734         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4735
4736         tmp = 0;
4737         /* enable the doorbell if requested */
4738         if (ring->use_doorbell) {
4739                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4740                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4741                                 DOORBELL_OFFSET, ring->doorbell_index);
4742
4743                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4744                                          DOORBELL_EN, 1);
4745                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4746                                          DOORBELL_SOURCE, 0);
4747                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4748                                          DOORBELL_HIT, 0);
4749         }
4750
4751         mqd->cp_hqd_pq_doorbell_control = tmp;
4752
4753         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4754         ring->wptr = 0;
4755         mqd->cp_hqd_pq_wptr = ring->wptr;
4756         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4757
4758         /* set the vmid for the queue */
4759         mqd->cp_hqd_vmid = 0;
4760
4761         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4762         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4763         mqd->cp_hqd_persistent_state = tmp;
4764
4765         /* activate the queue */
4766         mqd->cp_hqd_active = 1;
4767
4768         return 0;
4769 }
4770
4771 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4772                                       struct vi_mqd *mqd,
4773                                       struct amdgpu_ring *ring)
4774 {
4775         uint32_t tmp;
4776         int j;
4777
4778         /* disable wptr polling */
4779         tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4780         tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4781         WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4782
4783         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4784         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4785
4786         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4787         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4788
4789         /* enable doorbell? */
4790         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4791
4792         /* disable the queue if it's active */
4793         if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4794                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4795                 for (j = 0; j < adev->usec_timeout; j++) {
4796                         if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4797                                 break;
4798                         udelay(1);
4799                 }
4800                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4801                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4802                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4803         }
4804
4805         /* set the pointer to the MQD */
4806         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4807         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4808
4809         /* set MQD vmid to 0 */
4810         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4811
4812         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4813         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4814         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4815
4816         /* set up the HQD, this is similar to CP_RB0_CNTL */
4817         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4818
4819         /* set the wb address whether it's enabled or not */
4820         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4821                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4822         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4823                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4824
4825         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4826         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4827         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4828
4829         /* enable the doorbell if requested */
4830         if (ring->use_doorbell) {
4831                 if ((adev->asic_type == CHIP_CARRIZO) ||
4832                                 (adev->asic_type == CHIP_FIJI) ||
4833                                 (adev->asic_type == CHIP_STONEY)) {
4834                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4835                                                 AMDGPU_DOORBELL_KIQ << 2);
4836                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4837                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4838                 }
4839         }
4840         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4841
4842         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4843         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4844
4845         /* set the vmid for the queue */
4846         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4847
4848         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4849
4850         /* activate the queue */
4851         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4852
4853         if (ring->use_doorbell) {
4854                 tmp = RREG32(mmCP_PQ_STATUS);
4855                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4856                 WREG32(mmCP_PQ_STATUS, tmp);
4857         }
4858
4859         return 0;
4860 }
4861
4862 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4863                                    struct vi_mqd *mqd,
4864                                    u64 mqd_gpu_addr)
4865 {
4866         struct amdgpu_device *adev = ring->adev;
4867         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4868         uint64_t eop_gpu_addr;
4869         bool is_kiq = false;
4870
4871         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4872                 is_kiq = true;
4873
4874         if (is_kiq) {
4875                 eop_gpu_addr = kiq->eop_gpu_addr;
4876                 gfx_v8_0_kiq_setting(&kiq->ring);
4877         } else
4878                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4879                                         ring->queue * MEC_HPD_SIZE;
4880
4881         mutex_lock(&adev->srbm_mutex);
4882         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4883
4884         gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4885
4886         if (is_kiq)
4887                 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4888
4889         vi_srbm_select(adev, 0, 0, 0, 0);
4890         mutex_unlock(&adev->srbm_mutex);
4891
4892         if (is_kiq)
4893                 gfx_v8_0_kiq_enable(ring);
4894         else
4895                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4896
4897         return 0;
4898 }
4899
4900 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4901 {
4902         struct amdgpu_ring *ring = NULL;
4903         int i;
4904
4905         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4906                 ring = &adev->gfx.compute_ring[i];
4907                 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4908                 ring->mqd_obj = NULL;
4909         }
4910
4911         ring = &adev->gfx.kiq.ring;
4912         amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4913         ring->mqd_obj = NULL;
4914 }
4915
4916 static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4917                                     struct amdgpu_ring *ring)
4918 {
4919         struct vi_mqd *mqd;
4920         u64 mqd_gpu_addr;
4921         u32 *buf;
4922         int r = 0;
4923
4924         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4925                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4926                                     &mqd_gpu_addr, (void **)&buf);
4927         if (r) {
4928                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4929                 return r;
4930         }
4931
4932         /* init the mqd struct */
4933         memset(buf, 0, sizeof(struct vi_mqd));
4934         mqd = (struct vi_mqd *)buf;
4935
4936         r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4937         if (r)
4938                 return r;
4939
4940         amdgpu_bo_kunmap(ring->mqd_obj);
4941
4942         return 0;
4943 }
4944
4945 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4946 {
4947         struct amdgpu_ring *ring = NULL;
4948         int r, i;
4949
4950         ring = &adev->gfx.kiq.ring;
4951         r = gfx_v8_0_kiq_setup_queue(adev, ring);
4952         if (r)
4953                 return r;
4954
4955         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4956                 ring = &adev->gfx.compute_ring[i];
4957                 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4958                 if (r)
4959                         return r;
4960         }
4961
4962         gfx_v8_0_cp_compute_enable(adev, true);
4963
4964         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4965                 ring = &adev->gfx.compute_ring[i];
4966
4967                 ring->ready = true;
4968                 r = amdgpu_ring_test_ring(ring);
4969                 if (r)
4970                         ring->ready = false;
4971         }
4972
4973         ring = &adev->gfx.kiq.ring;
4974         ring->ready = true;
4975         r = amdgpu_ring_test_ring(ring);
4976         if (r)
4977                 ring->ready = false;
4978
4979         return 0;
4980 }
4981
4982 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4983 {
4984         int r, i, j;
4985         u32 tmp;
4986         bool use_doorbell = true;
4987         u64 hqd_gpu_addr;
4988         u64 mqd_gpu_addr;
4989         u64 eop_gpu_addr;
4990         u64 wb_gpu_addr;
4991         u32 *buf;
4992         struct vi_mqd *mqd;
4993
4994         /* init the queues.  */
4995         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4996                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4997
4998                 if (ring->mqd_obj == NULL) {
4999                         r = amdgpu_bo_create(adev,
5000                                              sizeof(struct vi_mqd),
5001                                              PAGE_SIZE, true,
5002                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5003                                              NULL, &ring->mqd_obj);
5004                         if (r) {
5005                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5006                                 return r;
5007                         }
5008                 }
5009
5010                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5011                 if (unlikely(r != 0)) {
5012                         gfx_v8_0_cp_compute_fini(adev);
5013                         return r;
5014                 }
5015                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5016                                   &mqd_gpu_addr);
5017                 if (r) {
5018                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5019                         gfx_v8_0_cp_compute_fini(adev);
5020                         return r;
5021                 }
5022                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5023                 if (r) {
5024                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5025                         gfx_v8_0_cp_compute_fini(adev);
5026                         return r;
5027                 }
5028
5029                 /* init the mqd struct */
5030                 memset(buf, 0, sizeof(struct vi_mqd));
5031
5032                 mqd = (struct vi_mqd *)buf;
5033                 mqd->header = 0xC0310800;
5034                 mqd->compute_pipelinestat_enable = 0x00000001;
5035                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5036                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5037                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5038                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5039                 mqd->compute_misc_reserved = 0x00000003;
5040
5041                 mutex_lock(&adev->srbm_mutex);
5042                 vi_srbm_select(adev, ring->me,
5043                                ring->pipe,
5044                                ring->queue, 0);
5045
5046                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5047                 eop_gpu_addr >>= 8;
5048
5049                 /* write the EOP addr */
5050                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5051                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5052
5053                 /* set the VMID assigned */
5054                 WREG32(mmCP_HQD_VMID, 0);
5055
5056                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5057                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5058                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5059                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5060                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5061
5062                 /* disable wptr polling */
5063                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5064                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5065                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5066
5067                 mqd->cp_hqd_eop_base_addr_lo =
5068                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5069                 mqd->cp_hqd_eop_base_addr_hi =
5070                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5071
5072                 /* enable doorbell? */
5073                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5074                 if (use_doorbell) {
5075                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5076                 } else {
5077                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5078                 }
5079                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5080                 mqd->cp_hqd_pq_doorbell_control = tmp;
5081
5082                 /* disable the queue if it's active */
5083                 mqd->cp_hqd_dequeue_request = 0;
5084                 mqd->cp_hqd_pq_rptr = 0;
5085                 mqd->cp_hqd_pq_wptr= 0;
5086                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5087                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5088                         for (j = 0; j < adev->usec_timeout; j++) {
5089                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5090                                         break;
5091                                 udelay(1);
5092                         }
5093                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5094                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5095                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5096                 }
5097
5098                 /* set the pointer to the MQD */
5099                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5100                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5101                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5102                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5103
5104                 /* set MQD vmid to 0 */
5105                 tmp = RREG32(mmCP_MQD_CONTROL);
5106                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5107                 WREG32(mmCP_MQD_CONTROL, tmp);
5108                 mqd->cp_mqd_control = tmp;
5109
5110                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5111                 hqd_gpu_addr = ring->gpu_addr >> 8;
5112                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5113                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5114                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5115                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5116
5117                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5118                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5119                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5120                                     (order_base_2(ring->ring_size / 4) - 1));
5121                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5122                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5123 #ifdef __BIG_ENDIAN
5124                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5125 #endif
5126                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5127                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5128                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5129                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5130                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5131                 mqd->cp_hqd_pq_control = tmp;
5132
5133                 /* set the wb address wether it's enabled or not */
5134                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5135                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5136                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5137                         upper_32_bits(wb_gpu_addr) & 0xffff;
5138                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5139                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5140                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5141                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5142
5143                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5144                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5145                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5146                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5147                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5148                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5149                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5150
5151                 /* enable the doorbell if requested */
5152                 if (use_doorbell) {
5153                         if ((adev->asic_type == CHIP_CARRIZO) ||
5154                             (adev->asic_type == CHIP_FIJI) ||
5155                             (adev->asic_type == CHIP_STONEY) ||
5156                             (adev->asic_type == CHIP_POLARIS11) ||
5157                             (adev->asic_type == CHIP_POLARIS10) ||
5158                             (adev->asic_type == CHIP_POLARIS12)) {
5159                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5160                                        AMDGPU_DOORBELL_KIQ << 2);
5161                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5162                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5163                         }
5164                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5165                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5166                                             DOORBELL_OFFSET, ring->doorbell_index);
5167                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5168                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5169                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5170                         mqd->cp_hqd_pq_doorbell_control = tmp;
5171
5172                 } else {
5173                         mqd->cp_hqd_pq_doorbell_control = 0;
5174                 }
5175                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5176                        mqd->cp_hqd_pq_doorbell_control);
5177
5178                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5179                 ring->wptr = 0;
5180                 mqd->cp_hqd_pq_wptr = ring->wptr;
5181                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5182                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5183
5184                 /* set the vmid for the queue */
5185                 mqd->cp_hqd_vmid = 0;
5186                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5187
5188                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5189                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5190                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5191                 mqd->cp_hqd_persistent_state = tmp;
5192                 if (adev->asic_type == CHIP_STONEY ||
5193                         adev->asic_type == CHIP_POLARIS11 ||
5194                         adev->asic_type == CHIP_POLARIS10 ||
5195                         adev->asic_type == CHIP_POLARIS12) {
5196                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5197                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5198                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5199                 }
5200
5201                 /* activate the queue */
5202                 mqd->cp_hqd_active = 1;
5203                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5204
5205                 vi_srbm_select(adev, 0, 0, 0, 0);
5206                 mutex_unlock(&adev->srbm_mutex);
5207
5208                 amdgpu_bo_kunmap(ring->mqd_obj);
5209                 amdgpu_bo_unreserve(ring->mqd_obj);
5210         }
5211
5212         if (use_doorbell) {
5213                 tmp = RREG32(mmCP_PQ_STATUS);
5214                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5215                 WREG32(mmCP_PQ_STATUS, tmp);
5216         }
5217
5218         gfx_v8_0_cp_compute_enable(adev, true);
5219
5220         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5221                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5222
5223                 ring->ready = true;
5224                 r = amdgpu_ring_test_ring(ring);
5225                 if (r)
5226                         ring->ready = false;
5227         }
5228
5229         return 0;
5230 }
5231
5232 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5233 {
5234         int r;
5235
5236         if (!(adev->flags & AMD_IS_APU))
5237                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5238
5239         if (!adev->pp_enabled) {
5240                 if (!adev->firmware.smu_load) {
5241                         /* legacy firmware loading */
5242                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5243                         if (r)
5244                                 return r;
5245
5246                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5247                         if (r)
5248                                 return r;
5249                 } else {
5250                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5251                                                         AMDGPU_UCODE_ID_CP_CE);
5252                         if (r)
5253                                 return -EINVAL;
5254
5255                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5256                                                         AMDGPU_UCODE_ID_CP_PFP);
5257                         if (r)
5258                                 return -EINVAL;
5259
5260                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5261                                                         AMDGPU_UCODE_ID_CP_ME);
5262                         if (r)
5263                                 return -EINVAL;
5264
5265                         if (adev->asic_type == CHIP_TOPAZ) {
5266                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5267                                 if (r)
5268                                         return r;
5269                         } else {
5270                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5271                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5272                                 if (r)
5273                                         return -EINVAL;
5274                         }
5275                 }
5276         }
5277
5278         r = gfx_v8_0_cp_gfx_resume(adev);
5279         if (r)
5280                 return r;
5281
5282         if (amdgpu_sriov_vf(adev))
5283                 r = gfx_v8_0_kiq_resume(adev);
5284         else
5285                 r = gfx_v8_0_cp_compute_resume(adev);
5286         if (r)
5287                 return r;
5288
5289         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5290
5291         return 0;
5292 }
5293
5294 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5295 {
5296         gfx_v8_0_cp_gfx_enable(adev, enable);
5297         gfx_v8_0_cp_compute_enable(adev, enable);
5298 }
5299
5300 static int gfx_v8_0_hw_init(void *handle)
5301 {
5302         int r;
5303         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5304
5305         gfx_v8_0_init_golden_registers(adev);
5306         gfx_v8_0_gpu_init(adev);
5307
5308         r = gfx_v8_0_rlc_resume(adev);
5309         if (r)
5310                 return r;
5311
5312         r = gfx_v8_0_cp_resume(adev);
5313
5314         return r;
5315 }
5316
5317 static int gfx_v8_0_hw_fini(void *handle)
5318 {
5319         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320
5321         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5322         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5323         if (amdgpu_sriov_vf(adev)) {
5324                 gfx_v8_0_kiq_free_queue(adev);
5325                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5326                 return 0;
5327         }
5328         gfx_v8_0_cp_enable(adev, false);
5329         gfx_v8_0_rlc_stop(adev);
5330         gfx_v8_0_cp_compute_fini(adev);
5331
5332         amdgpu_set_powergating_state(adev,
5333                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5334
5335         return 0;
5336 }
5337
5338 static int gfx_v8_0_suspend(void *handle)
5339 {
5340         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5341
5342         return gfx_v8_0_hw_fini(adev);
5343 }
5344
5345 static int gfx_v8_0_resume(void *handle)
5346 {
5347         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5348
5349         return gfx_v8_0_hw_init(adev);
5350 }
5351
5352 static bool gfx_v8_0_is_idle(void *handle)
5353 {
5354         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5355
5356         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5357                 return false;
5358         else
5359                 return true;
5360 }
5361
5362 static int gfx_v8_0_wait_for_idle(void *handle)
5363 {
5364         unsigned i;
5365         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5366
5367         for (i = 0; i < adev->usec_timeout; i++) {
5368                 if (gfx_v8_0_is_idle(handle))
5369                         return 0;
5370
5371                 udelay(1);
5372         }
5373         return -ETIMEDOUT;
5374 }
5375
5376 static bool gfx_v8_0_check_soft_reset(void *handle)
5377 {
5378         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5380         u32 tmp;
5381
5382         /* GRBM_STATUS */
5383         tmp = RREG32(mmGRBM_STATUS);
5384         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5385                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5386                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5387                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5388                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5389                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5390                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5391                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5392                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5393                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5394                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5395                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5396                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5397         }
5398
5399         /* GRBM_STATUS2 */
5400         tmp = RREG32(mmGRBM_STATUS2);
5401         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5402                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5403                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5404
5405         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5406             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5407             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5408                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5409                                                 SOFT_RESET_CPF, 1);
5410                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5411                                                 SOFT_RESET_CPC, 1);
5412                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5413                                                 SOFT_RESET_CPG, 1);
5414                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5415                                                 SOFT_RESET_GRBM, 1);
5416         }
5417
5418         /* SRBM_STATUS */
5419         tmp = RREG32(mmSRBM_STATUS);
5420         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5421                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5422                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5423         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5424                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5425                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5426
5427         if (grbm_soft_reset || srbm_soft_reset) {
5428                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5429                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5430                 return true;
5431         } else {
5432                 adev->gfx.grbm_soft_reset = 0;
5433                 adev->gfx.srbm_soft_reset = 0;
5434                 return false;
5435         }
5436 }
5437
5438 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5439                                   struct amdgpu_ring *ring)
5440 {
5441         int i;
5442
5443         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5444         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5445                 u32 tmp;
5446                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5447                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5448                                     DEQUEUE_REQ, 2);
5449                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5450                 for (i = 0; i < adev->usec_timeout; i++) {
5451                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5452                                 break;
5453                         udelay(1);
5454                 }
5455         }
5456 }
5457
5458 static int gfx_v8_0_pre_soft_reset(void *handle)
5459 {
5460         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5461         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5462
5463         if ((!adev->gfx.grbm_soft_reset) &&
5464             (!adev->gfx.srbm_soft_reset))
5465                 return 0;
5466
5467         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5468         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5469
5470         /* stop the rlc */
5471         gfx_v8_0_rlc_stop(adev);
5472
5473         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5474             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5475                 /* Disable GFX parsing/prefetching */
5476                 gfx_v8_0_cp_gfx_enable(adev, false);
5477
5478         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5479             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5480             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5481             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5482                 int i;
5483
5484                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5485                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5486
5487                         gfx_v8_0_inactive_hqd(adev, ring);
5488                 }
5489                 /* Disable MEC parsing/prefetching */
5490                 gfx_v8_0_cp_compute_enable(adev, false);
5491         }
5492
5493        return 0;
5494 }
5495
5496 static int gfx_v8_0_soft_reset(void *handle)
5497 {
5498         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5499         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5500         u32 tmp;
5501
5502         if ((!adev->gfx.grbm_soft_reset) &&
5503             (!adev->gfx.srbm_soft_reset))
5504                 return 0;
5505
5506         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5507         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5508
5509         if (grbm_soft_reset || srbm_soft_reset) {
5510                 tmp = RREG32(mmGMCON_DEBUG);
5511                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5512                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5513                 WREG32(mmGMCON_DEBUG, tmp);
5514                 udelay(50);
5515         }
5516
5517         if (grbm_soft_reset) {
5518                 tmp = RREG32(mmGRBM_SOFT_RESET);
5519                 tmp |= grbm_soft_reset;
5520                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5521                 WREG32(mmGRBM_SOFT_RESET, tmp);
5522                 tmp = RREG32(mmGRBM_SOFT_RESET);
5523
5524                 udelay(50);
5525
5526                 tmp &= ~grbm_soft_reset;
5527                 WREG32(mmGRBM_SOFT_RESET, tmp);
5528                 tmp = RREG32(mmGRBM_SOFT_RESET);
5529         }
5530
5531         if (srbm_soft_reset) {
5532                 tmp = RREG32(mmSRBM_SOFT_RESET);
5533                 tmp |= srbm_soft_reset;
5534                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5535                 WREG32(mmSRBM_SOFT_RESET, tmp);
5536                 tmp = RREG32(mmSRBM_SOFT_RESET);
5537
5538                 udelay(50);
5539
5540                 tmp &= ~srbm_soft_reset;
5541                 WREG32(mmSRBM_SOFT_RESET, tmp);
5542                 tmp = RREG32(mmSRBM_SOFT_RESET);
5543         }
5544
5545         if (grbm_soft_reset || srbm_soft_reset) {
5546                 tmp = RREG32(mmGMCON_DEBUG);
5547                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5548                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5549                 WREG32(mmGMCON_DEBUG, tmp);
5550         }
5551
5552         /* Wait a little for things to settle down */
5553         udelay(50);
5554
5555         return 0;
5556 }
5557
5558 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5559                               struct amdgpu_ring *ring)
5560 {
5561         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5562         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5563         WREG32(mmCP_HQD_PQ_RPTR, 0);
5564         WREG32(mmCP_HQD_PQ_WPTR, 0);
5565         vi_srbm_select(adev, 0, 0, 0, 0);
5566 }
5567
5568 static int gfx_v8_0_post_soft_reset(void *handle)
5569 {
5570         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5571         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5572
5573         if ((!adev->gfx.grbm_soft_reset) &&
5574             (!adev->gfx.srbm_soft_reset))
5575                 return 0;
5576
5577         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5578         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5579
5580         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5581             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5582                 gfx_v8_0_cp_gfx_resume(adev);
5583
5584         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5585             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5586             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5587             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5588                 int i;
5589
5590                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5591                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5592
5593                         gfx_v8_0_init_hqd(adev, ring);
5594                 }
5595                 gfx_v8_0_cp_compute_resume(adev);
5596         }
5597         gfx_v8_0_rlc_start(adev);
5598
5599         return 0;
5600 }
5601
5602 /**
5603  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5604  *
5605  * @adev: amdgpu_device pointer
5606  *
5607  * Fetches a GPU clock counter snapshot.
5608  * Returns the 64 bit clock counter snapshot.
5609  */
5610 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5611 {
5612         uint64_t clock;
5613
5614         mutex_lock(&adev->gfx.gpu_clock_mutex);
5615         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5616         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5617                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5618         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5619         return clock;
5620 }
5621
5622 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5623                                           uint32_t vmid,
5624                                           uint32_t gds_base, uint32_t gds_size,
5625                                           uint32_t gws_base, uint32_t gws_size,
5626                                           uint32_t oa_base, uint32_t oa_size)
5627 {
5628         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5629         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5630
5631         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5632         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5633
5634         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5635         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5636
5637         /* GDS Base */
5638         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5639         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5640                                 WRITE_DATA_DST_SEL(0)));
5641         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5642         amdgpu_ring_write(ring, 0);
5643         amdgpu_ring_write(ring, gds_base);
5644
5645         /* GDS Size */
5646         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5647         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5648                                 WRITE_DATA_DST_SEL(0)));
5649         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5650         amdgpu_ring_write(ring, 0);
5651         amdgpu_ring_write(ring, gds_size);
5652
5653         /* GWS */
5654         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5655         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5656                                 WRITE_DATA_DST_SEL(0)));
5657         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5658         amdgpu_ring_write(ring, 0);
5659         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5660
5661         /* OA */
5662         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5663         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5664                                 WRITE_DATA_DST_SEL(0)));
5665         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5666         amdgpu_ring_write(ring, 0);
5667         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5668 }
5669
5670 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5671 {
5672         WREG32(mmSQ_IND_INDEX,
5673                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5674                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5675                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5676                 (SQ_IND_INDEX__FORCE_READ_MASK));
5677         return RREG32(mmSQ_IND_DATA);
5678 }
5679
5680 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5681                            uint32_t wave, uint32_t thread,
5682                            uint32_t regno, uint32_t num, uint32_t *out)
5683 {
5684         WREG32(mmSQ_IND_INDEX,
5685                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5686                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5687                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5688                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5689                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5690                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5691         while (num--)
5692                 *(out++) = RREG32(mmSQ_IND_DATA);
5693 }
5694
5695 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5696 {
5697         /* type 0 wave data */
5698         dst[(*no_fields)++] = 0;
5699         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5700         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5701         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5702         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5703         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5704         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5705         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5706         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5707         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5708         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5709         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5710         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5711         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5712         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5713         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5714         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5715         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5716         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5717 }
5718
5719 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5720                                      uint32_t wave, uint32_t start,
5721                                      uint32_t size, uint32_t *dst)
5722 {
5723         wave_read_regs(
5724                 adev, simd, wave, 0,
5725                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5726 }
5727
5728
5729 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5730         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5731         .select_se_sh = &gfx_v8_0_select_se_sh,
5732         .read_wave_data = &gfx_v8_0_read_wave_data,
5733         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5734 };
5735
5736 static int gfx_v8_0_early_init(void *handle)
5737 {
5738         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5739
5740         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5741         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5742         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5743         gfx_v8_0_set_ring_funcs(adev);
5744         gfx_v8_0_set_irq_funcs(adev);
5745         gfx_v8_0_set_gds_init(adev);
5746         gfx_v8_0_set_rlc_funcs(adev);
5747
5748         return 0;
5749 }
5750
5751 static int gfx_v8_0_late_init(void *handle)
5752 {
5753         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5754         int r;
5755
5756         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5757         if (r)
5758                 return r;
5759
5760         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5761         if (r)
5762                 return r;
5763
5764         /* requires IBs so do in late init after IB pool is initialized */
5765         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5766         if (r)
5767                 return r;
5768
5769         amdgpu_set_powergating_state(adev,
5770                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5771
5772         return 0;
5773 }
5774
5775 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5776                                                        bool enable)
5777 {
5778         if ((adev->asic_type == CHIP_POLARIS11) ||
5779             (adev->asic_type == CHIP_POLARIS12))
5780                 /* Send msg to SMU via Powerplay */
5781                 amdgpu_set_powergating_state(adev,
5782                                              AMD_IP_BLOCK_TYPE_SMC,
5783                                              enable ?
5784                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5785
5786         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5787 }
5788
5789 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5790                                                         bool enable)
5791 {
5792         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5793 }
5794
5795 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5796                 bool enable)
5797 {
5798         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5799 }
5800
5801 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5802                                           bool enable)
5803 {
5804         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5805 }
5806
5807 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5808                                                 bool enable)
5809 {
5810         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5811
5812         /* Read any GFX register to wake up GFX. */
5813         if (!enable)
5814                 RREG32(mmDB_RENDER_CONTROL);
5815 }
5816
5817 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5818                                           bool enable)
5819 {
5820         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5821                 cz_enable_gfx_cg_power_gating(adev, true);
5822                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5823                         cz_enable_gfx_pipeline_power_gating(adev, true);
5824         } else {
5825                 cz_enable_gfx_cg_power_gating(adev, false);
5826                 cz_enable_gfx_pipeline_power_gating(adev, false);
5827         }
5828 }
5829
5830 static int gfx_v8_0_set_powergating_state(void *handle,
5831                                           enum amd_powergating_state state)
5832 {
5833         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5834         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5835
5836         switch (adev->asic_type) {
5837         case CHIP_CARRIZO:
5838         case CHIP_STONEY:
5839
5840                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5841                         cz_enable_sck_slow_down_on_power_up(adev, true);
5842                         cz_enable_sck_slow_down_on_power_down(adev, true);
5843                 } else {
5844                         cz_enable_sck_slow_down_on_power_up(adev, false);
5845                         cz_enable_sck_slow_down_on_power_down(adev, false);
5846                 }
5847                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5848                         cz_enable_cp_power_gating(adev, true);
5849                 else
5850                         cz_enable_cp_power_gating(adev, false);
5851
5852                 cz_update_gfx_cg_power_gating(adev, enable);
5853
5854                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5855                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5856                 else
5857                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5858
5859                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5860                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5861                 else
5862                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5863                 break;
5864         case CHIP_POLARIS11:
5865         case CHIP_POLARIS12:
5866                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5867                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5868                 else
5869                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5870
5871                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5872                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5873                 else
5874                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5875
5876                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5877                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5878                 else
5879                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5880                 break;
5881         default:
5882                 break;
5883         }
5884
5885         return 0;
5886 }
5887
5888 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5889 {
5890         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5891         int data;
5892
5893         /* AMD_CG_SUPPORT_GFX_MGCG */
5894         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5895         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5896                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5897
5898         /* AMD_CG_SUPPORT_GFX_CGLG */
5899         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5900         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5901                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5902
5903         /* AMD_CG_SUPPORT_GFX_CGLS */
5904         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5905                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5906
5907         /* AMD_CG_SUPPORT_GFX_CGTS */
5908         data = RREG32(mmCGTS_SM_CTRL_REG);
5909         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5910                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5911
5912         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5913         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5914                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5915
5916         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5917         data = RREG32(mmRLC_MEM_SLP_CNTL);
5918         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5919                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5920
5921         /* AMD_CG_SUPPORT_GFX_CP_LS */
5922         data = RREG32(mmCP_MEM_SLP_CNTL);
5923         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5924                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5925 }
5926
5927 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5928                                      uint32_t reg_addr, uint32_t cmd)
5929 {
5930         uint32_t data;
5931
5932         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5933
5934         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5935         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5936
5937         data = RREG32(mmRLC_SERDES_WR_CTRL);
5938         if (adev->asic_type == CHIP_STONEY)
5939                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5940                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5941                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5942                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5943                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5944                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5945                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5946                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5947                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5948         else
5949                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5950                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5951                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5952                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5953                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5954                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5955                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5956                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5957                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5958                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5959                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5960         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5961                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5962                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5963                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5964
5965         WREG32(mmRLC_SERDES_WR_CTRL, data);
5966 }
5967
5968 #define MSG_ENTER_RLC_SAFE_MODE     1
5969 #define MSG_EXIT_RLC_SAFE_MODE      0
5970 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5971 #define RLC_GPR_REG2__REQ__SHIFT 0
5972 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5973 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5974
5975 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5976 {
5977         u32 data;
5978         unsigned i;
5979
5980         data = RREG32(mmRLC_CNTL);
5981         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5982                 return;
5983
5984         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5985                 data |= RLC_SAFE_MODE__CMD_MASK;
5986                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5987                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5988                 WREG32(mmRLC_SAFE_MODE, data);
5989
5990                 for (i = 0; i < adev->usec_timeout; i++) {
5991                         if ((RREG32(mmRLC_GPM_STAT) &
5992                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5993                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5994                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5995                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5996                                 break;
5997                         udelay(1);
5998                 }
5999
6000                 for (i = 0; i < adev->usec_timeout; i++) {
6001                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6002                                 break;
6003                         udelay(1);
6004                 }
6005                 adev->gfx.rlc.in_safe_mode = true;
6006         }
6007 }
6008
6009 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6010 {
6011         u32 data = 0;
6012         unsigned i;
6013
6014         data = RREG32(mmRLC_CNTL);
6015         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6016                 return;
6017
6018         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6019                 if (adev->gfx.rlc.in_safe_mode) {
6020                         data |= RLC_SAFE_MODE__CMD_MASK;
6021                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6022                         WREG32(mmRLC_SAFE_MODE, data);
6023                         adev->gfx.rlc.in_safe_mode = false;
6024                 }
6025         }
6026
6027         for (i = 0; i < adev->usec_timeout; i++) {
6028                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6029                         break;
6030                 udelay(1);
6031         }
6032 }
6033
6034 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6035         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6036         .exit_safe_mode = iceland_exit_rlc_safe_mode
6037 };
6038
6039 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6040                                                       bool enable)
6041 {
6042         uint32_t temp, data;
6043
6044         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6045
6046         /* It is disabled by HW by default */
6047         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6048                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6049                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6050                                 /* 1 - RLC memory Light sleep */
6051                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6052
6053                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6054                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6055                 }
6056
6057                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6058                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6059                 if (adev->flags & AMD_IS_APU)
6060                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6061                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6062                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6063                 else
6064                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6065                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6066                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6067                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6068
6069                 if (temp != data)
6070                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6071
6072                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6073                 gfx_v8_0_wait_for_rlc_serdes(adev);
6074
6075                 /* 5 - clear mgcg override */
6076                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6077
6078                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6079                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6080                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6081                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6082                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6083                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6084                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6085                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6086                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6087                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6088                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6089                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6090                         if (temp != data)
6091                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6092                 }
6093                 udelay(50);
6094
6095                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6096                 gfx_v8_0_wait_for_rlc_serdes(adev);
6097         } else {
6098                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6099                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6100                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6101                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6102                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6103                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6104                 if (temp != data)
6105                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6106
6107                 /* 2 - disable MGLS in RLC */
6108                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6109                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6110                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6111                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6112                 }
6113
6114                 /* 3 - disable MGLS in CP */
6115                 data = RREG32(mmCP_MEM_SLP_CNTL);
6116                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6117                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6118                         WREG32(mmCP_MEM_SLP_CNTL, data);
6119                 }
6120
6121                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6122                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6123                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6124                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6125                 if (temp != data)
6126                         WREG32(mmCGTS_SM_CTRL_REG, data);
6127
6128                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6129                 gfx_v8_0_wait_for_rlc_serdes(adev);
6130
6131                 /* 6 - set mgcg override */
6132                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6133
6134                 udelay(50);
6135
6136                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6137                 gfx_v8_0_wait_for_rlc_serdes(adev);
6138         }
6139
6140         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6141 }
6142
6143 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6144                                                       bool enable)
6145 {
6146         uint32_t temp, temp1, data, data1;
6147
6148         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6149
6150         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6151
6152         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6153                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6154                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6155                 if (temp1 != data1)
6156                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6157
6158                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6159                 gfx_v8_0_wait_for_rlc_serdes(adev);
6160
6161                 /* 2 - clear cgcg override */
6162                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6163
6164                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6165                 gfx_v8_0_wait_for_rlc_serdes(adev);
6166
6167                 /* 3 - write cmd to set CGLS */
6168                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6169
6170                 /* 4 - enable cgcg */
6171                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6172
6173                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6174                         /* enable cgls*/
6175                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6176
6177                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6178                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6179
6180                         if (temp1 != data1)
6181                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6182                 } else {
6183                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6184                 }
6185
6186                 if (temp != data)
6187                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6188
6189                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6190                  * Cmp_busy/GFX_Idle interrupts
6191                  */
6192                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6193         } else {
6194                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6195                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6196
6197                 /* TEST CGCG */
6198                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6199                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6200                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6201                 if (temp1 != data1)
6202                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6203
6204                 /* read gfx register to wake up cgcg */
6205                 RREG32(mmCB_CGTT_SCLK_CTRL);
6206                 RREG32(mmCB_CGTT_SCLK_CTRL);
6207                 RREG32(mmCB_CGTT_SCLK_CTRL);
6208                 RREG32(mmCB_CGTT_SCLK_CTRL);
6209
6210                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6211                 gfx_v8_0_wait_for_rlc_serdes(adev);
6212
6213                 /* write cmd to Set CGCG Overrride */
6214                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6215
6216                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6217                 gfx_v8_0_wait_for_rlc_serdes(adev);
6218
6219                 /* write cmd to Clear CGLS */
6220                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6221
6222                 /* disable cgcg, cgls should be disabled too. */
6223                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6224                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6225                 if (temp != data)
6226                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6227         }
6228
6229         gfx_v8_0_wait_for_rlc_serdes(adev);
6230
6231         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6232 }
6233 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6234                                             bool enable)
6235 {
6236         if (enable) {
6237                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6238                  * ===  MGCG + MGLS + TS(CG/LS) ===
6239                  */
6240                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6241                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6242         } else {
6243                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6244                  * ===  CGCG + CGLS ===
6245                  */
6246                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6247                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6248         }
6249         return 0;
6250 }
6251
6252 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6253                                           enum amd_clockgating_state state)
6254 {
6255         uint32_t msg_id, pp_state = 0;
6256         uint32_t pp_support_state = 0;
6257         void *pp_handle = adev->powerplay.pp_handle;
6258
6259         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6260                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6261                         pp_support_state = PP_STATE_SUPPORT_LS;
6262                         pp_state = PP_STATE_LS;
6263                 }
6264                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6265                         pp_support_state |= PP_STATE_SUPPORT_CG;
6266                         pp_state |= PP_STATE_CG;
6267                 }
6268                 if (state == AMD_CG_STATE_UNGATE)
6269                         pp_state = 0;
6270
6271                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6272                                 PP_BLOCK_GFX_CG,
6273                                 pp_support_state,
6274                                 pp_state);
6275                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6276         }
6277
6278         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6279                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6280                         pp_support_state = PP_STATE_SUPPORT_LS;
6281                         pp_state = PP_STATE_LS;
6282                 }
6283
6284                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6285                         pp_support_state |= PP_STATE_SUPPORT_CG;
6286                         pp_state |= PP_STATE_CG;
6287                 }
6288
6289                 if (state == AMD_CG_STATE_UNGATE)
6290                         pp_state = 0;
6291
6292                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6293                                 PP_BLOCK_GFX_MG,
6294                                 pp_support_state,
6295                                 pp_state);
6296                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6297         }
6298
6299         return 0;
6300 }
6301
6302 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6303                                           enum amd_clockgating_state state)
6304 {
6305
6306         uint32_t msg_id, pp_state = 0;
6307         uint32_t pp_support_state = 0;
6308         void *pp_handle = adev->powerplay.pp_handle;
6309
6310         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6311                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6312                         pp_support_state = PP_STATE_SUPPORT_LS;
6313                         pp_state = PP_STATE_LS;
6314                 }
6315                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6316                         pp_support_state |= PP_STATE_SUPPORT_CG;
6317                         pp_state |= PP_STATE_CG;
6318                 }
6319                 if (state == AMD_CG_STATE_UNGATE)
6320                         pp_state = 0;
6321
6322                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6323                                 PP_BLOCK_GFX_CG,
6324                                 pp_support_state,
6325                                 pp_state);
6326                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6327         }
6328
6329         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6330                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6331                         pp_support_state = PP_STATE_SUPPORT_LS;
6332                         pp_state = PP_STATE_LS;
6333                 }
6334                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6335                         pp_support_state |= PP_STATE_SUPPORT_CG;
6336                         pp_state |= PP_STATE_CG;
6337                 }
6338                 if (state == AMD_CG_STATE_UNGATE)
6339                         pp_state = 0;
6340
6341                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6342                                 PP_BLOCK_GFX_3D,
6343                                 pp_support_state,
6344                                 pp_state);
6345                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6346         }
6347
6348         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6349                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6350                         pp_support_state = PP_STATE_SUPPORT_LS;
6351                         pp_state = PP_STATE_LS;
6352                 }
6353
6354                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6355                         pp_support_state |= PP_STATE_SUPPORT_CG;
6356                         pp_state |= PP_STATE_CG;
6357                 }
6358
6359                 if (state == AMD_CG_STATE_UNGATE)
6360                         pp_state = 0;
6361
6362                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6363                                 PP_BLOCK_GFX_MG,
6364                                 pp_support_state,
6365                                 pp_state);
6366                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6367         }
6368
6369         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6370                 pp_support_state = PP_STATE_SUPPORT_LS;
6371
6372                 if (state == AMD_CG_STATE_UNGATE)
6373                         pp_state = 0;
6374                 else
6375                         pp_state = PP_STATE_LS;
6376
6377                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6378                                 PP_BLOCK_GFX_RLC,
6379                                 pp_support_state,
6380                                 pp_state);
6381                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6382         }
6383
6384         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6385                 pp_support_state = PP_STATE_SUPPORT_LS;
6386
6387                 if (state == AMD_CG_STATE_UNGATE)
6388                         pp_state = 0;
6389                 else
6390                         pp_state = PP_STATE_LS;
6391                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6392                         PP_BLOCK_GFX_CP,
6393                         pp_support_state,
6394                         pp_state);
6395                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6396         }
6397
6398         return 0;
6399 }
6400
6401 static int gfx_v8_0_set_clockgating_state(void *handle,
6402                                           enum amd_clockgating_state state)
6403 {
6404         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6405
6406         switch (adev->asic_type) {
6407         case CHIP_FIJI:
6408         case CHIP_CARRIZO:
6409         case CHIP_STONEY:
6410                 gfx_v8_0_update_gfx_clock_gating(adev,
6411                                                  state == AMD_CG_STATE_GATE ? true : false);
6412                 break;
6413         case CHIP_TONGA:
6414                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6415                 break;
6416         case CHIP_POLARIS10:
6417         case CHIP_POLARIS11:
6418                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6419                 break;
6420         default:
6421                 break;
6422         }
6423         return 0;
6424 }
6425
6426 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6427 {
6428         return ring->adev->wb.wb[ring->rptr_offs];
6429 }
6430
6431 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6432 {
6433         struct amdgpu_device *adev = ring->adev;
6434
6435         if (ring->use_doorbell)
6436                 /* XXX check if swapping is necessary on BE */
6437                 return ring->adev->wb.wb[ring->wptr_offs];
6438         else
6439                 return RREG32(mmCP_RB0_WPTR);
6440 }
6441
6442 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6443 {
6444         struct amdgpu_device *adev = ring->adev;
6445
6446         if (ring->use_doorbell) {
6447                 /* XXX check if swapping is necessary on BE */
6448                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6449                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6450         } else {
6451                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6452                 (void)RREG32(mmCP_RB0_WPTR);
6453         }
6454 }
6455
6456 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6457 {
6458         u32 ref_and_mask, reg_mem_engine;
6459
6460         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6461             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6462                 switch (ring->me) {
6463                 case 1:
6464                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6465                         break;
6466                 case 2:
6467                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6468                         break;
6469                 default:
6470                         return;
6471                 }
6472                 reg_mem_engine = 0;
6473         } else {
6474                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6475                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6476         }
6477
6478         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6479         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6480                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6481                                  reg_mem_engine));
6482         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6483         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6484         amdgpu_ring_write(ring, ref_and_mask);
6485         amdgpu_ring_write(ring, ref_and_mask);
6486         amdgpu_ring_write(ring, 0x20); /* poll interval */
6487 }
6488
6489 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6490 {
6491         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6492         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6493                 EVENT_INDEX(4));
6494
6495         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6496         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6497                 EVENT_INDEX(0));
6498 }
6499
6500
6501 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6502 {
6503         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6504         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6505                                  WRITE_DATA_DST_SEL(0) |
6506                                  WR_CONFIRM));
6507         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6508         amdgpu_ring_write(ring, 0);
6509         amdgpu_ring_write(ring, 1);
6510
6511 }
6512
6513 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6514                                       struct amdgpu_ib *ib,
6515                                       unsigned vm_id, bool ctx_switch)
6516 {
6517         u32 header, control = 0;
6518
6519         if (ib->flags & AMDGPU_IB_FLAG_CE)
6520                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6521         else
6522                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6523
6524         control |= ib->length_dw | (vm_id << 24);
6525
6526         amdgpu_ring_write(ring, header);
6527         amdgpu_ring_write(ring,
6528 #ifdef __BIG_ENDIAN
6529                           (2 << 0) |
6530 #endif
6531                           (ib->gpu_addr & 0xFFFFFFFC));
6532         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6533         amdgpu_ring_write(ring, control);
6534 }
6535
6536 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6537                                           struct amdgpu_ib *ib,
6538                                           unsigned vm_id, bool ctx_switch)
6539 {
6540         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6541
6542         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6543         amdgpu_ring_write(ring,
6544 #ifdef __BIG_ENDIAN
6545                                 (2 << 0) |
6546 #endif
6547                                 (ib->gpu_addr & 0xFFFFFFFC));
6548         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6549         amdgpu_ring_write(ring, control);
6550 }
6551
6552 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6553                                          u64 seq, unsigned flags)
6554 {
6555         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6556         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6557
6558         /* EVENT_WRITE_EOP - flush caches, send int */
6559         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6560         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6561                                  EOP_TC_ACTION_EN |
6562                                  EOP_TC_WB_ACTION_EN |
6563                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6564                                  EVENT_INDEX(5)));
6565         amdgpu_ring_write(ring, addr & 0xfffffffc);
6566         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6567                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6568         amdgpu_ring_write(ring, lower_32_bits(seq));
6569         amdgpu_ring_write(ring, upper_32_bits(seq));
6570
6571 }
6572
6573 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6574 {
6575         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6576         uint32_t seq = ring->fence_drv.sync_seq;
6577         uint64_t addr = ring->fence_drv.gpu_addr;
6578
6579         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6580         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6581                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6582                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6583         amdgpu_ring_write(ring, addr & 0xfffffffc);
6584         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6585         amdgpu_ring_write(ring, seq);
6586         amdgpu_ring_write(ring, 0xffffffff);
6587         amdgpu_ring_write(ring, 4); /* poll interval */
6588 }
6589
6590 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6591                                         unsigned vm_id, uint64_t pd_addr)
6592 {
6593         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6594
6595         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6596         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6597                                  WRITE_DATA_DST_SEL(0)) |
6598                                  WR_CONFIRM);
6599         if (vm_id < 8) {
6600                 amdgpu_ring_write(ring,
6601                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6602         } else {
6603                 amdgpu_ring_write(ring,
6604                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6605         }
6606         amdgpu_ring_write(ring, 0);
6607         amdgpu_ring_write(ring, pd_addr >> 12);
6608
6609         /* bits 0-15 are the VM contexts0-15 */
6610         /* invalidate the cache */
6611         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6612         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6613                                  WRITE_DATA_DST_SEL(0)));
6614         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6615         amdgpu_ring_write(ring, 0);
6616         amdgpu_ring_write(ring, 1 << vm_id);
6617
6618         /* wait for the invalidate to complete */
6619         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6620         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6621                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6622                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6623         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6624         amdgpu_ring_write(ring, 0);
6625         amdgpu_ring_write(ring, 0); /* ref */
6626         amdgpu_ring_write(ring, 0); /* mask */
6627         amdgpu_ring_write(ring, 0x20); /* poll interval */
6628
6629         /* compute doesn't have PFP */
6630         if (usepfp) {
6631                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6632                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6633                 amdgpu_ring_write(ring, 0x0);
6634                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6635                 amdgpu_ring_insert_nop(ring, 128);
6636         }
6637 }
6638
6639 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6640 {
6641         return ring->adev->wb.wb[ring->wptr_offs];
6642 }
6643
6644 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6645 {
6646         struct amdgpu_device *adev = ring->adev;
6647
6648         /* XXX check if swapping is necessary on BE */
6649         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6650         WDOORBELL32(ring->doorbell_index, ring->wptr);
6651 }
6652
6653 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6654                                              u64 addr, u64 seq,
6655                                              unsigned flags)
6656 {
6657         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6658         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6659
6660         /* RELEASE_MEM - flush caches, send int */
6661         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6662         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6663                                  EOP_TC_ACTION_EN |
6664                                  EOP_TC_WB_ACTION_EN |
6665                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6666                                  EVENT_INDEX(5)));
6667         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6668         amdgpu_ring_write(ring, addr & 0xfffffffc);
6669         amdgpu_ring_write(ring, upper_32_bits(addr));
6670         amdgpu_ring_write(ring, lower_32_bits(seq));
6671         amdgpu_ring_write(ring, upper_32_bits(seq));
6672 }
6673
6674 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6675                                          u64 seq, unsigned int flags)
6676 {
6677         /* we only allocate 32bit for each seq wb address */
6678         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6679
6680         /* write fence seq to the "addr" */
6681         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6682         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6683                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6684         amdgpu_ring_write(ring, lower_32_bits(addr));
6685         amdgpu_ring_write(ring, upper_32_bits(addr));
6686         amdgpu_ring_write(ring, lower_32_bits(seq));
6687
6688         if (flags & AMDGPU_FENCE_FLAG_INT) {
6689                 /* set register to trigger INT */
6690                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6691                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6692                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6693                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6694                 amdgpu_ring_write(ring, 0);
6695                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6696         }
6697 }
6698
6699 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6700 {
6701         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6702         amdgpu_ring_write(ring, 0);
6703 }
6704
6705 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6706 {
6707         uint32_t dw2 = 0;
6708
6709         if (amdgpu_sriov_vf(ring->adev))
6710                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6711                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6712
6713         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6714         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6715                 gfx_v8_0_ring_emit_vgt_flush(ring);
6716                 /* set load_global_config & load_global_uconfig */
6717                 dw2 |= 0x8001;
6718                 /* set load_cs_sh_regs */
6719                 dw2 |= 0x01000000;
6720                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6721                 dw2 |= 0x10002;
6722
6723                 /* set load_ce_ram if preamble presented */
6724                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6725                         dw2 |= 0x10000000;
6726         } else {
6727                 /* still load_ce_ram if this is the first time preamble presented
6728                  * although there is no context switch happens.
6729                  */
6730                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6731                         dw2 |= 0x10000000;
6732         }
6733
6734         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6735         amdgpu_ring_write(ring, dw2);
6736         amdgpu_ring_write(ring, 0);
6737
6738         if (amdgpu_sriov_vf(ring->adev))
6739                 gfx_v8_0_ring_emit_de_meta_init(ring,
6740                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6741 }
6742
6743 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6744 {
6745         struct amdgpu_device *adev = ring->adev;
6746
6747         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6748         amdgpu_ring_write(ring, 0 |     /* src: register*/
6749                                 (5 << 8) |      /* dst: memory */
6750                                 (1 << 20));     /* write confirm */
6751         amdgpu_ring_write(ring, reg);
6752         amdgpu_ring_write(ring, 0);
6753         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6754                                 adev->virt.reg_val_offs * 4));
6755         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6756                                 adev->virt.reg_val_offs * 4));
6757 }
6758
6759 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6760                                   uint32_t val)
6761 {
6762         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6763         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6764         amdgpu_ring_write(ring, reg);
6765         amdgpu_ring_write(ring, 0);
6766         amdgpu_ring_write(ring, val);
6767 }
6768
6769 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6770                                                  enum amdgpu_interrupt_state state)
6771 {
6772         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6773                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6774 }
6775
6776 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6777                                                      int me, int pipe,
6778                                                      enum amdgpu_interrupt_state state)
6779 {
6780         /*
6781          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6782          * handles the setting of interrupts for this specific pipe. All other
6783          * pipes' interrupts are set by amdkfd.
6784          */
6785
6786         if (me == 1) {
6787                 switch (pipe) {
6788                 case 0:
6789                         break;
6790                 default:
6791                         DRM_DEBUG("invalid pipe %d\n", pipe);
6792                         return;
6793                 }
6794         } else {
6795                 DRM_DEBUG("invalid me %d\n", me);
6796                 return;
6797         }
6798
6799         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6800                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6801 }
6802
6803 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6804                                              struct amdgpu_irq_src *source,
6805                                              unsigned type,
6806                                              enum amdgpu_interrupt_state state)
6807 {
6808         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6809                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6810
6811         return 0;
6812 }
6813
6814 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6815                                               struct amdgpu_irq_src *source,
6816                                               unsigned type,
6817                                               enum amdgpu_interrupt_state state)
6818 {
6819         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6820                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6821
6822         return 0;
6823 }
6824
6825 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6826                                             struct amdgpu_irq_src *src,
6827                                             unsigned type,
6828                                             enum amdgpu_interrupt_state state)
6829 {
6830         switch (type) {
6831         case AMDGPU_CP_IRQ_GFX_EOP:
6832                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6833                 break;
6834         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6835                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6836                 break;
6837         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6838                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6839                 break;
6840         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6841                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6842                 break;
6843         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6844                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6845                 break;
6846         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6847                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6848                 break;
6849         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6850                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6851                 break;
6852         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6853                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6854                 break;
6855         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6856                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6857                 break;
6858         default:
6859                 break;
6860         }
6861         return 0;
6862 }
6863
6864 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6865                             struct amdgpu_irq_src *source,
6866                             struct amdgpu_iv_entry *entry)
6867 {
6868         int i;
6869         u8 me_id, pipe_id, queue_id;
6870         struct amdgpu_ring *ring;
6871
6872         DRM_DEBUG("IH: CP EOP\n");
6873         me_id = (entry->ring_id & 0x0c) >> 2;
6874         pipe_id = (entry->ring_id & 0x03) >> 0;
6875         queue_id = (entry->ring_id & 0x70) >> 4;
6876
6877         switch (me_id) {
6878         case 0:
6879                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6880                 break;
6881         case 1:
6882         case 2:
6883                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6884                         ring = &adev->gfx.compute_ring[i];
6885                         /* Per-queue interrupt is supported for MEC starting from VI.
6886                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6887                           */
6888                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6889                                 amdgpu_fence_process(ring);
6890                 }
6891                 break;
6892         }
6893         return 0;
6894 }
6895
6896 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6897                                  struct amdgpu_irq_src *source,
6898                                  struct amdgpu_iv_entry *entry)
6899 {
6900         DRM_ERROR("Illegal register access in command stream\n");
6901         schedule_work(&adev->reset_work);
6902         return 0;
6903 }
6904
6905 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6906                                   struct amdgpu_irq_src *source,
6907                                   struct amdgpu_iv_entry *entry)
6908 {
6909         DRM_ERROR("Illegal instruction in command stream\n");
6910         schedule_work(&adev->reset_work);
6911         return 0;
6912 }
6913
6914 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6915                                             struct amdgpu_irq_src *src,
6916                                             unsigned int type,
6917                                             enum amdgpu_interrupt_state state)
6918 {
6919         uint32_t tmp, target;
6920         struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6921
6922         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6923
6924         if (ring->me == 1)
6925                 target = mmCP_ME1_PIPE0_INT_CNTL;
6926         else
6927                 target = mmCP_ME2_PIPE0_INT_CNTL;
6928         target += ring->pipe;
6929
6930         switch (type) {
6931         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6932                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6933                         tmp = RREG32(mmCPC_INT_CNTL);
6934                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6935                                                  GENERIC2_INT_ENABLE, 0);
6936                         WREG32(mmCPC_INT_CNTL, tmp);
6937
6938                         tmp = RREG32(target);
6939                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6940                                                  GENERIC2_INT_ENABLE, 0);
6941                         WREG32(target, tmp);
6942                 } else {
6943                         tmp = RREG32(mmCPC_INT_CNTL);
6944                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6945                                                  GENERIC2_INT_ENABLE, 1);
6946                         WREG32(mmCPC_INT_CNTL, tmp);
6947
6948                         tmp = RREG32(target);
6949                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6950                                                  GENERIC2_INT_ENABLE, 1);
6951                         WREG32(target, tmp);
6952                 }
6953                 break;
6954         default:
6955                 BUG(); /* kiq only support GENERIC2_INT now */
6956                 break;
6957         }
6958         return 0;
6959 }
6960
6961 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6962                             struct amdgpu_irq_src *source,
6963                             struct amdgpu_iv_entry *entry)
6964 {
6965         u8 me_id, pipe_id, queue_id;
6966         struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6967
6968         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6969
6970         me_id = (entry->ring_id & 0x0c) >> 2;
6971         pipe_id = (entry->ring_id & 0x03) >> 0;
6972         queue_id = (entry->ring_id & 0x70) >> 4;
6973         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6974                    me_id, pipe_id, queue_id);
6975
6976         amdgpu_fence_process(ring);
6977         return 0;
6978 }
6979
6980 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6981         .name = "gfx_v8_0",
6982         .early_init = gfx_v8_0_early_init,
6983         .late_init = gfx_v8_0_late_init,
6984         .sw_init = gfx_v8_0_sw_init,
6985         .sw_fini = gfx_v8_0_sw_fini,
6986         .hw_init = gfx_v8_0_hw_init,
6987         .hw_fini = gfx_v8_0_hw_fini,
6988         .suspend = gfx_v8_0_suspend,
6989         .resume = gfx_v8_0_resume,
6990         .is_idle = gfx_v8_0_is_idle,
6991         .wait_for_idle = gfx_v8_0_wait_for_idle,
6992         .check_soft_reset = gfx_v8_0_check_soft_reset,
6993         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6994         .soft_reset = gfx_v8_0_soft_reset,
6995         .post_soft_reset = gfx_v8_0_post_soft_reset,
6996         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6997         .set_powergating_state = gfx_v8_0_set_powergating_state,
6998         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6999 };
7000
7001 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7002         .type = AMDGPU_RING_TYPE_GFX,
7003         .align_mask = 0xff,
7004         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7005         .get_rptr = gfx_v8_0_ring_get_rptr,
7006         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7007         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7008         .emit_frame_size =
7009                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7010                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7011                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7012                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7013                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7014                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7015                 2 + /* gfx_v8_ring_emit_sb */
7016                 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
7017         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7018         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7019         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7020         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7021         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7022         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7023         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7024         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7025         .test_ring = gfx_v8_0_ring_test_ring,
7026         .test_ib = gfx_v8_0_ring_test_ib,
7027         .insert_nop = amdgpu_ring_insert_nop,
7028         .pad_ib = amdgpu_ring_generic_pad_ib,
7029         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7030         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7031 };
7032
7033 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7034         .type = AMDGPU_RING_TYPE_COMPUTE,
7035         .align_mask = 0xff,
7036         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7037         .get_rptr = gfx_v8_0_ring_get_rptr,
7038         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7039         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7040         .emit_frame_size =
7041                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7042                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7043                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7044                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7045                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7046                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7047         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7048         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7049         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7050         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7051         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7052         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7053         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7054         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7055         .test_ring = gfx_v8_0_ring_test_ring,
7056         .test_ib = gfx_v8_0_ring_test_ib,
7057         .insert_nop = amdgpu_ring_insert_nop,
7058         .pad_ib = amdgpu_ring_generic_pad_ib,
7059 };
7060
7061 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7062         .type = AMDGPU_RING_TYPE_KIQ,
7063         .align_mask = 0xff,
7064         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7065         .get_rptr = gfx_v8_0_ring_get_rptr,
7066         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7067         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7068         .emit_frame_size =
7069                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7070                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7071                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7072                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7073                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7074                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7075         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7076         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7077         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7078         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7079         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7080         .test_ring = gfx_v8_0_ring_test_ring,
7081         .test_ib = gfx_v8_0_ring_test_ib,
7082         .insert_nop = amdgpu_ring_insert_nop,
7083         .pad_ib = amdgpu_ring_generic_pad_ib,
7084         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7085         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7086 };
7087
7088 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7089 {
7090         int i;
7091
7092         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7093
7094         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7095                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7096
7097         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7098                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7099 }
7100
7101 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7102         .set = gfx_v8_0_set_eop_interrupt_state,
7103         .process = gfx_v8_0_eop_irq,
7104 };
7105
7106 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7107         .set = gfx_v8_0_set_priv_reg_fault_state,
7108         .process = gfx_v8_0_priv_reg_irq,
7109 };
7110
7111 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7112         .set = gfx_v8_0_set_priv_inst_fault_state,
7113         .process = gfx_v8_0_priv_inst_irq,
7114 };
7115
7116 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7117         .set = gfx_v8_0_kiq_set_interrupt_state,
7118         .process = gfx_v8_0_kiq_irq,
7119 };
7120
7121 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7122 {
7123         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7124         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7125
7126         adev->gfx.priv_reg_irq.num_types = 1;
7127         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7128
7129         adev->gfx.priv_inst_irq.num_types = 1;
7130         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7131
7132         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7133         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7134 }
7135
7136 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7137 {
7138         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7139 }
7140
7141 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7142 {
7143         /* init asci gds info */
7144         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7145         adev->gds.gws.total_size = 64;
7146         adev->gds.oa.total_size = 16;
7147
7148         if (adev->gds.mem.total_size == 64 * 1024) {
7149                 adev->gds.mem.gfx_partition_size = 4096;
7150                 adev->gds.mem.cs_partition_size = 4096;
7151
7152                 adev->gds.gws.gfx_partition_size = 4;
7153                 adev->gds.gws.cs_partition_size = 4;
7154
7155                 adev->gds.oa.gfx_partition_size = 4;
7156                 adev->gds.oa.cs_partition_size = 1;
7157         } else {
7158                 adev->gds.mem.gfx_partition_size = 1024;
7159                 adev->gds.mem.cs_partition_size = 1024;
7160
7161                 adev->gds.gws.gfx_partition_size = 16;
7162                 adev->gds.gws.cs_partition_size = 16;
7163
7164                 adev->gds.oa.gfx_partition_size = 4;
7165                 adev->gds.oa.cs_partition_size = 4;
7166         }
7167 }
7168
7169 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7170                                                  u32 bitmap)
7171 {
7172         u32 data;
7173
7174         if (!bitmap)
7175                 return;
7176
7177         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7178         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7179
7180         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7181 }
7182
7183 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7184 {
7185         u32 data, mask;
7186
7187         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7188                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7189
7190         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7191
7192         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7193 }
7194
7195 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7196 {
7197         int i, j, k, counter, active_cu_number = 0;
7198         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7199         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7200         unsigned disable_masks[4 * 2];
7201
7202         memset(cu_info, 0, sizeof(*cu_info));
7203
7204         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7205
7206         mutex_lock(&adev->grbm_idx_mutex);
7207         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7208                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7209                         mask = 1;
7210                         ao_bitmap = 0;
7211                         counter = 0;
7212                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7213                         if (i < 4 && j < 2)
7214                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7215                                         adev, disable_masks[i * 2 + j]);
7216                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7217                         cu_info->bitmap[i][j] = bitmap;
7218
7219                         for (k = 0; k < 16; k ++) {
7220                                 if (bitmap & mask) {
7221                                         if (counter < 2)
7222                                                 ao_bitmap |= mask;
7223                                         counter ++;
7224                                 }
7225                                 mask <<= 1;
7226                         }
7227                         active_cu_number += counter;
7228                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7229                 }
7230         }
7231         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7232         mutex_unlock(&adev->grbm_idx_mutex);
7233
7234         cu_info->number = active_cu_number;
7235         cu_info->ao_cu_mask = ao_cu_mask;
7236 }
7237
7238 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7239 {
7240         .type = AMD_IP_BLOCK_TYPE_GFX,
7241         .major = 8,
7242         .minor = 0,
7243         .rev = 0,
7244         .funcs = &gfx_v8_0_ip_funcs,
7245 };
7246
7247 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7248 {
7249         .type = AMD_IP_BLOCK_TYPE_GFX,
7250         .major = 8,
7251         .minor = 1,
7252         .rev = 0,
7253         .funcs = &gfx_v8_0_ip_funcs,
7254 };
7255
7256 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7257 {
7258         uint64_t ce_payload_addr;
7259         int cnt_ce;
7260         static union {
7261                 struct amdgpu_ce_ib_state regular;
7262                 struct amdgpu_ce_ib_state_chained_ib chained;
7263         } ce_payload = {0};
7264
7265         if (ring->adev->virt.chained_ib_support) {
7266                 ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload);
7267                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7268         } else {
7269                 ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload);
7270                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7271         }
7272
7273         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7274         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7275                                 WRITE_DATA_DST_SEL(8) |
7276                                 WR_CONFIRM) |
7277                                 WRITE_DATA_CACHE_POLICY(0));
7278         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7279         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7280         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7281 }
7282
7283 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7284 {
7285         uint64_t de_payload_addr, gds_addr;
7286         int cnt_de;
7287         static union {
7288                 struct amdgpu_de_ib_state regular;
7289                 struct amdgpu_de_ib_state_chained_ib chained;
7290         } de_payload = {0};
7291
7292         gds_addr = csa_addr + 4096;
7293         if (ring->adev->virt.chained_ib_support) {
7294                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7295                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7296                 de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload);
7297                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7298         } else {
7299                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7300                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7301                 de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload);
7302                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7303         }
7304
7305         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7306         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7307                                 WRITE_DATA_DST_SEL(8) |
7308                                 WR_CONFIRM) |
7309                                 WRITE_DATA_CACHE_POLICY(0));
7310         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7311         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7312         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7313 }