]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: Initialize pipe priority order on graphic initialization
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662
663 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 {
665         switch (adev->asic_type) {
666         case CHIP_TOPAZ:
667                 amdgpu_program_register_sequence(adev,
668                                                  iceland_mgcg_cgcg_init,
669                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
670                 amdgpu_program_register_sequence(adev,
671                                                  golden_settings_iceland_a11,
672                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
673                 amdgpu_program_register_sequence(adev,
674                                                  iceland_golden_common_all,
675                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
676                 break;
677         case CHIP_FIJI:
678                 amdgpu_program_register_sequence(adev,
679                                                  fiji_mgcg_cgcg_init,
680                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
681                 amdgpu_program_register_sequence(adev,
682                                                  golden_settings_fiji_a10,
683                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
684                 amdgpu_program_register_sequence(adev,
685                                                  fiji_golden_common_all,
686                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
687                 break;
688
689         case CHIP_TONGA:
690                 amdgpu_program_register_sequence(adev,
691                                                  tonga_mgcg_cgcg_init,
692                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
693                 amdgpu_program_register_sequence(adev,
694                                                  golden_settings_tonga_a11,
695                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
696                 amdgpu_program_register_sequence(adev,
697                                                  tonga_golden_common_all,
698                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
699                 break;
700         case CHIP_POLARIS11:
701         case CHIP_POLARIS12:
702                 amdgpu_program_register_sequence(adev,
703                                                  golden_settings_polaris11_a11,
704                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
705                 amdgpu_program_register_sequence(adev,
706                                                  polaris11_golden_common_all,
707                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
708                 break;
709         case CHIP_POLARIS10:
710                 amdgpu_program_register_sequence(adev,
711                                                  golden_settings_polaris10_a11,
712                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
713                 amdgpu_program_register_sequence(adev,
714                                                  polaris10_golden_common_all,
715                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
716                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717                 if (adev->pdev->revision == 0xc7 &&
718                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
723                 }
724                 break;
725         case CHIP_CARRIZO:
726                 amdgpu_program_register_sequence(adev,
727                                                  cz_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  cz_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  cz_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
735                 break;
736         case CHIP_STONEY:
737                 amdgpu_program_register_sequence(adev,
738                                                  stoney_mgcg_cgcg_init,
739                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
740                 amdgpu_program_register_sequence(adev,
741                                                  stoney_golden_settings_a11,
742                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
743                 amdgpu_program_register_sequence(adev,
744                                                  stoney_golden_common_all,
745                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
746                 break;
747         default:
748                 break;
749         }
750 }
751
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 {
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
757 }
758
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 {
761         struct amdgpu_device *adev = ring->adev;
762         uint32_t scratch;
763         uint32_t tmp = 0;
764         unsigned i;
765         int r;
766
767         r = amdgpu_gfx_scratch_get(adev, &scratch);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
770                 return r;
771         }
772         WREG32(scratch, 0xCAFEDEAD);
773         r = amdgpu_ring_alloc(ring, 3);
774         if (r) {
775                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776                           ring->idx, r);
777                 amdgpu_gfx_scratch_free(adev, scratch);
778                 return r;
779         }
780         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782         amdgpu_ring_write(ring, 0xDEADBEEF);
783         amdgpu_ring_commit(ring);
784
785         for (i = 0; i < adev->usec_timeout; i++) {
786                 tmp = RREG32(scratch);
787                 if (tmp == 0xDEADBEEF)
788                         break;
789                 DRM_UDELAY(1);
790         }
791         if (i < adev->usec_timeout) {
792                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
793                          ring->idx, i);
794         } else {
795                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796                           ring->idx, scratch, tmp);
797                 r = -EINVAL;
798         }
799         amdgpu_gfx_scratch_free(adev, scratch);
800         return r;
801 }
802
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 {
805         struct amdgpu_device *adev = ring->adev;
806         struct amdgpu_ib ib;
807         struct dma_fence *f = NULL;
808         uint32_t scratch;
809         uint32_t tmp = 0;
810         long r;
811
812         r = amdgpu_gfx_scratch_get(adev, &scratch);
813         if (r) {
814                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
815                 return r;
816         }
817         WREG32(scratch, 0xCAFEDEAD);
818         memset(&ib, 0, sizeof(ib));
819         r = amdgpu_ib_get(adev, NULL, 256, &ib);
820         if (r) {
821                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
822                 goto err1;
823         }
824         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
825         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
826         ib.ptr[2] = 0xDEADBEEF;
827         ib.length_dw = 3;
828
829         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
830         if (r)
831                 goto err2;
832
833         r = dma_fence_wait_timeout(f, false, timeout);
834         if (r == 0) {
835                 DRM_ERROR("amdgpu: IB test timed out.\n");
836                 r = -ETIMEDOUT;
837                 goto err2;
838         } else if (r < 0) {
839                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
840                 goto err2;
841         }
842         tmp = RREG32(scratch);
843         if (tmp == 0xDEADBEEF) {
844                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
845                 r = 0;
846         } else {
847                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
848                           scratch, tmp);
849                 r = -EINVAL;
850         }
851 err2:
852         amdgpu_ib_free(adev, &ib, NULL);
853         dma_fence_put(f);
854 err1:
855         amdgpu_gfx_scratch_free(adev, scratch);
856         return r;
857 }
858
859
860 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
861         release_firmware(adev->gfx.pfp_fw);
862         adev->gfx.pfp_fw = NULL;
863         release_firmware(adev->gfx.me_fw);
864         adev->gfx.me_fw = NULL;
865         release_firmware(adev->gfx.ce_fw);
866         adev->gfx.ce_fw = NULL;
867         release_firmware(adev->gfx.rlc_fw);
868         adev->gfx.rlc_fw = NULL;
869         release_firmware(adev->gfx.mec_fw);
870         adev->gfx.mec_fw = NULL;
871         if ((adev->asic_type != CHIP_STONEY) &&
872             (adev->asic_type != CHIP_TOPAZ))
873                 release_firmware(adev->gfx.mec2_fw);
874         adev->gfx.mec2_fw = NULL;
875
876         kfree(adev->gfx.rlc.register_list_format);
877 }
878
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881         const char *chip_name;
882         char fw_name[30];
883         int err;
884         struct amdgpu_firmware_info *info = NULL;
885         const struct common_firmware_header *header = NULL;
886         const struct gfx_firmware_header_v1_0 *cp_hdr;
887         const struct rlc_firmware_header_v2_0 *rlc_hdr;
888         unsigned int *tmp = NULL, i;
889
890         DRM_DEBUG("\n");
891
892         switch (adev->asic_type) {
893         case CHIP_TOPAZ:
894                 chip_name = "topaz";
895                 break;
896         case CHIP_TONGA:
897                 chip_name = "tonga";
898                 break;
899         case CHIP_CARRIZO:
900                 chip_name = "carrizo";
901                 break;
902         case CHIP_FIJI:
903                 chip_name = "fiji";
904                 break;
905         case CHIP_POLARIS11:
906                 chip_name = "polaris11";
907                 break;
908         case CHIP_POLARIS10:
909                 chip_name = "polaris10";
910                 break;
911         case CHIP_POLARIS12:
912                 chip_name = "polaris12";
913                 break;
914         case CHIP_STONEY:
915                 chip_name = "stoney";
916                 break;
917         default:
918                 BUG();
919         }
920
921         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923         if (err)
924                 goto out;
925         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926         if (err)
927                 goto out;
928         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931
932         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934         if (err)
935                 goto out;
936         err = amdgpu_ucode_validate(adev->gfx.me_fw);
937         if (err)
938                 goto out;
939         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941
942         /* chain ib ucode isn't formal released, just disable it by far
943          * TODO: when ucod ready we should use ucode version to judge if
944          * chain-ib support or not.
945          */
946         adev->virt.chained_ib_support = false;
947
948         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
949
950         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
951         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
952         if (err)
953                 goto out;
954         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
955         if (err)
956                 goto out;
957         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
958         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
960
961         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
962         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
963         if (err)
964                 goto out;
965         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
966         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
967         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
968         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
969
970         adev->gfx.rlc.save_and_restore_offset =
971                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
972         adev->gfx.rlc.clear_state_descriptor_offset =
973                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
974         adev->gfx.rlc.avail_scratch_ram_locations =
975                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
976         adev->gfx.rlc.reg_restore_list_size =
977                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
978         adev->gfx.rlc.reg_list_format_start =
979                         le32_to_cpu(rlc_hdr->reg_list_format_start);
980         adev->gfx.rlc.reg_list_format_separate_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
982         adev->gfx.rlc.starting_offsets_start =
983                         le32_to_cpu(rlc_hdr->starting_offsets_start);
984         adev->gfx.rlc.reg_list_format_size_bytes =
985                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
986         adev->gfx.rlc.reg_list_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
988
989         adev->gfx.rlc.register_list_format =
990                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
991                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
992
993         if (!adev->gfx.rlc.register_list_format) {
994                 err = -ENOMEM;
995                 goto out;
996         }
997
998         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
999                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1000         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1001                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1002
1003         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1004
1005         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1006                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1007         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1008                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1009
1010         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1011         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1012         if (err)
1013                 goto out;
1014         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1015         if (err)
1016                 goto out;
1017         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1018         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1019         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1020
1021         if ((adev->asic_type != CHIP_STONEY) &&
1022             (adev->asic_type != CHIP_TOPAZ)) {
1023                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1024                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1025                 if (!err) {
1026                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1027                         if (err)
1028                                 goto out;
1029                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1030                                 adev->gfx.mec2_fw->data;
1031                         adev->gfx.mec2_fw_version =
1032                                 le32_to_cpu(cp_hdr->header.ucode_version);
1033                         adev->gfx.mec2_feature_version =
1034                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1035                 } else {
1036                         err = 0;
1037                         adev->gfx.mec2_fw = NULL;
1038                 }
1039         }
1040
1041         if (adev->firmware.smu_load) {
1042                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1043                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1044                 info->fw = adev->gfx.pfp_fw;
1045                 header = (const struct common_firmware_header *)info->fw->data;
1046                 adev->firmware.fw_size +=
1047                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1048
1049                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1050                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1051                 info->fw = adev->gfx.me_fw;
1052                 header = (const struct common_firmware_header *)info->fw->data;
1053                 adev->firmware.fw_size +=
1054                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055
1056                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1057                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1058                 info->fw = adev->gfx.ce_fw;
1059                 header = (const struct common_firmware_header *)info->fw->data;
1060                 adev->firmware.fw_size +=
1061                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062
1063                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1064                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1065                 info->fw = adev->gfx.rlc_fw;
1066                 header = (const struct common_firmware_header *)info->fw->data;
1067                 adev->firmware.fw_size +=
1068                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1069
1070                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1071                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1072                 info->fw = adev->gfx.mec_fw;
1073                 header = (const struct common_firmware_header *)info->fw->data;
1074                 adev->firmware.fw_size +=
1075                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1076
1077                 /* we need account JT in */
1078                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1079                 adev->firmware.fw_size +=
1080                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1081
1082                 if (amdgpu_sriov_vf(adev)) {
1083                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1084                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1085                         info->fw = adev->gfx.mec_fw;
1086                         adev->firmware.fw_size +=
1087                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1088                 }
1089
1090                 if (adev->gfx.mec2_fw) {
1091                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1092                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1093                         info->fw = adev->gfx.mec2_fw;
1094                         header = (const struct common_firmware_header *)info->fw->data;
1095                         adev->firmware.fw_size +=
1096                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1097                 }
1098
1099         }
1100
1101 out:
1102         if (err) {
1103                 dev_err(adev->dev,
1104                         "gfx8: Failed to load firmware \"%s\"\n",
1105                         fw_name);
1106                 release_firmware(adev->gfx.pfp_fw);
1107                 adev->gfx.pfp_fw = NULL;
1108                 release_firmware(adev->gfx.me_fw);
1109                 adev->gfx.me_fw = NULL;
1110                 release_firmware(adev->gfx.ce_fw);
1111                 adev->gfx.ce_fw = NULL;
1112                 release_firmware(adev->gfx.rlc_fw);
1113                 adev->gfx.rlc_fw = NULL;
1114                 release_firmware(adev->gfx.mec_fw);
1115                 adev->gfx.mec_fw = NULL;
1116                 release_firmware(adev->gfx.mec2_fw);
1117                 adev->gfx.mec2_fw = NULL;
1118         }
1119         return err;
1120 }
1121
1122 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1123                                     volatile u32 *buffer)
1124 {
1125         u32 count = 0, i;
1126         const struct cs_section_def *sect = NULL;
1127         const struct cs_extent_def *ext = NULL;
1128
1129         if (adev->gfx.rlc.cs_data == NULL)
1130                 return;
1131         if (buffer == NULL)
1132                 return;
1133
1134         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1135         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1136
1137         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1138         buffer[count++] = cpu_to_le32(0x80000000);
1139         buffer[count++] = cpu_to_le32(0x80000000);
1140
1141         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1142                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1143                         if (sect->id == SECT_CONTEXT) {
1144                                 buffer[count++] =
1145                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1146                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1147                                                 PACKET3_SET_CONTEXT_REG_START);
1148                                 for (i = 0; i < ext->reg_count; i++)
1149                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1150                         } else {
1151                                 return;
1152                         }
1153                 }
1154         }
1155
1156         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1157         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1158                         PACKET3_SET_CONTEXT_REG_START);
1159         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1160         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1163         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1164
1165         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1166         buffer[count++] = cpu_to_le32(0);
1167 }
1168
1169 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1170 {
1171         const __le32 *fw_data;
1172         volatile u32 *dst_ptr;
1173         int me, i, max_me = 4;
1174         u32 bo_offset = 0;
1175         u32 table_offset, table_size;
1176
1177         if (adev->asic_type == CHIP_CARRIZO)
1178                 max_me = 5;
1179
1180         /* write the cp table buffer */
1181         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1182         for (me = 0; me < max_me; me++) {
1183                 if (me == 0) {
1184                         const struct gfx_firmware_header_v1_0 *hdr =
1185                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1186                         fw_data = (const __le32 *)
1187                                 (adev->gfx.ce_fw->data +
1188                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189                         table_offset = le32_to_cpu(hdr->jt_offset);
1190                         table_size = le32_to_cpu(hdr->jt_size);
1191                 } else if (me == 1) {
1192                         const struct gfx_firmware_header_v1_0 *hdr =
1193                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1194                         fw_data = (const __le32 *)
1195                                 (adev->gfx.pfp_fw->data +
1196                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197                         table_offset = le32_to_cpu(hdr->jt_offset);
1198                         table_size = le32_to_cpu(hdr->jt_size);
1199                 } else if (me == 2) {
1200                         const struct gfx_firmware_header_v1_0 *hdr =
1201                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1202                         fw_data = (const __le32 *)
1203                                 (adev->gfx.me_fw->data +
1204                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205                         table_offset = le32_to_cpu(hdr->jt_offset);
1206                         table_size = le32_to_cpu(hdr->jt_size);
1207                 } else if (me == 3) {
1208                         const struct gfx_firmware_header_v1_0 *hdr =
1209                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210                         fw_data = (const __le32 *)
1211                                 (adev->gfx.mec_fw->data +
1212                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1213                         table_offset = le32_to_cpu(hdr->jt_offset);
1214                         table_size = le32_to_cpu(hdr->jt_size);
1215                 } else  if (me == 4) {
1216                         const struct gfx_firmware_header_v1_0 *hdr =
1217                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1218                         fw_data = (const __le32 *)
1219                                 (adev->gfx.mec2_fw->data +
1220                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1221                         table_offset = le32_to_cpu(hdr->jt_offset);
1222                         table_size = le32_to_cpu(hdr->jt_size);
1223                 }
1224
1225                 for (i = 0; i < table_size; i ++) {
1226                         dst_ptr[bo_offset + i] =
1227                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1228                 }
1229
1230                 bo_offset += table_size;
1231         }
1232 }
1233
1234 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1235 {
1236         int r;
1237
1238         /* clear state block */
1239         if (adev->gfx.rlc.clear_state_obj) {
1240                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1241                 if (unlikely(r != 0))
1242                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1243                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1244                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1245                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1246                 adev->gfx.rlc.clear_state_obj = NULL;
1247         }
1248
1249         /* jump table block */
1250         if (adev->gfx.rlc.cp_table_obj) {
1251                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1252                 if (unlikely(r != 0))
1253                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1254                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1255                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1256                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1257                 adev->gfx.rlc.cp_table_obj = NULL;
1258         }
1259 }
1260
1261 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1262 {
1263         volatile u32 *dst_ptr;
1264         u32 dws;
1265         const struct cs_section_def *cs_data;
1266         int r;
1267
1268         adev->gfx.rlc.cs_data = vi_cs_data;
1269
1270         cs_data = adev->gfx.rlc.cs_data;
1271
1272         if (cs_data) {
1273                 /* clear state block */
1274                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1275
1276                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1277                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1278                                              AMDGPU_GEM_DOMAIN_VRAM,
1279                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1280                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1281                                              NULL, NULL,
1282                                              &adev->gfx.rlc.clear_state_obj);
1283                         if (r) {
1284                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1285                                 gfx_v8_0_rlc_fini(adev);
1286                                 return r;
1287                         }
1288                 }
1289                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1290                 if (unlikely(r != 0)) {
1291                         gfx_v8_0_rlc_fini(adev);
1292                         return r;
1293                 }
1294                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1295                                   &adev->gfx.rlc.clear_state_gpu_addr);
1296                 if (r) {
1297                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1298                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1299                         gfx_v8_0_rlc_fini(adev);
1300                         return r;
1301                 }
1302
1303                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1304                 if (r) {
1305                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1306                         gfx_v8_0_rlc_fini(adev);
1307                         return r;
1308                 }
1309                 /* set up the cs buffer */
1310                 dst_ptr = adev->gfx.rlc.cs_ptr;
1311                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1312                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1313                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1314         }
1315
1316         if ((adev->asic_type == CHIP_CARRIZO) ||
1317             (adev->asic_type == CHIP_STONEY)) {
1318                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1319                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1320                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1321                                              AMDGPU_GEM_DOMAIN_VRAM,
1322                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1323                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1324                                              NULL, NULL,
1325                                              &adev->gfx.rlc.cp_table_obj);
1326                         if (r) {
1327                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1328                                 return r;
1329                         }
1330                 }
1331
1332                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1333                 if (unlikely(r != 0)) {
1334                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1335                         return r;
1336                 }
1337                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1338                                   &adev->gfx.rlc.cp_table_gpu_addr);
1339                 if (r) {
1340                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1341                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1342                         return r;
1343                 }
1344                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1345                 if (r) {
1346                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1347                         return r;
1348                 }
1349
1350                 cz_init_cp_jump_table(adev);
1351
1352                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1354         }
1355
1356         return 0;
1357 }
1358
1359 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1360 {
1361         int r;
1362
1363         if (adev->gfx.mec.hpd_eop_obj) {
1364                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1365                 if (unlikely(r != 0))
1366                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1367                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1368                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1369                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1370                 adev->gfx.mec.hpd_eop_obj = NULL;
1371         }
1372 }
1373
1374 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1375                                   struct amdgpu_ring *ring,
1376                                   struct amdgpu_irq_src *irq)
1377 {
1378         int r = 0;
1379
1380         if (amdgpu_sriov_vf(adev)) {
1381                 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1382                 if (r)
1383                         return r;
1384         }
1385
1386         ring->adev = NULL;
1387         ring->ring_obj = NULL;
1388         ring->use_doorbell = true;
1389         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390         if (adev->gfx.mec2_fw) {
1391                 ring->me = 2;
1392                 ring->pipe = 0;
1393         } else {
1394                 ring->me = 1;
1395                 ring->pipe = 1;
1396         }
1397
1398         irq->data = ring;
1399         ring->queue = 0;
1400         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1401         r = amdgpu_ring_init(adev, ring, 1024,
1402                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1403         if (r)
1404                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1405
1406         return r;
1407 }
1408
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410                                    struct amdgpu_irq_src *irq)
1411 {
1412         if (amdgpu_sriov_vf(ring->adev))
1413                 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1414
1415         amdgpu_ring_fini(ring);
1416         irq->data = NULL;
1417 }
1418
1419 #define MEC_HPD_SIZE 2048
1420
1421 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1422 {
1423         int r;
1424         u32 *hpd;
1425
1426         /*
1427          * we assign only 1 pipe because all other pipes will
1428          * be handled by KFD
1429          */
1430         adev->gfx.mec.num_mec = 1;
1431         adev->gfx.mec.num_pipe = 1;
1432         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1433
1434         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1435                 r = amdgpu_bo_create(adev,
1436                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1437                                      PAGE_SIZE, true,
1438                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1439                                      &adev->gfx.mec.hpd_eop_obj);
1440                 if (r) {
1441                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1442                         return r;
1443                 }
1444         }
1445
1446         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1447         if (unlikely(r != 0)) {
1448                 gfx_v8_0_mec_fini(adev);
1449                 return r;
1450         }
1451         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1452                           &adev->gfx.mec.hpd_eop_gpu_addr);
1453         if (r) {
1454                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1455                 gfx_v8_0_mec_fini(adev);
1456                 return r;
1457         }
1458         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1459         if (r) {
1460                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1461                 gfx_v8_0_mec_fini(adev);
1462                 return r;
1463         }
1464
1465         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1466
1467         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1468         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1469
1470         return 0;
1471 }
1472
1473 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1474 {
1475         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1476
1477         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1478         kiq->eop_obj = NULL;
1479 }
1480
1481 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1482 {
1483         int r;
1484         u32 *hpd;
1485         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1486
1487         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1488                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1489                                     &kiq->eop_gpu_addr, (void **)&hpd);
1490         if (r) {
1491                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1492                 return r;
1493         }
1494
1495         memset(hpd, 0, MEC_HPD_SIZE);
1496
1497         amdgpu_bo_kunmap(kiq->eop_obj);
1498
1499         return 0;
1500 }
1501
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504         0x7e000209, 0x7e020208,
1505         0x7e040207, 0x7e060206,
1506         0x7e080205, 0x7e0a0204,
1507         0x7e0c0203, 0x7e0e0202,
1508         0x7e100201, 0x7e120200,
1509         0x7e140209, 0x7e160208,
1510         0x7e180207, 0x7e1a0206,
1511         0x7e1c0205, 0x7e1e0204,
1512         0x7e200203, 0x7e220202,
1513         0x7e240201, 0x7e260200,
1514         0x7e280209, 0x7e2a0208,
1515         0x7e2c0207, 0x7e2e0206,
1516         0x7e300205, 0x7e320204,
1517         0x7e340203, 0x7e360202,
1518         0x7e380201, 0x7e3a0200,
1519         0x7e3c0209, 0x7e3e0208,
1520         0x7e400207, 0x7e420206,
1521         0x7e440205, 0x7e460204,
1522         0x7e480203, 0x7e4a0202,
1523         0x7e4c0201, 0x7e4e0200,
1524         0x7e500209, 0x7e520208,
1525         0x7e540207, 0x7e560206,
1526         0x7e580205, 0x7e5a0204,
1527         0x7e5c0203, 0x7e5e0202,
1528         0x7e600201, 0x7e620200,
1529         0x7e640209, 0x7e660208,
1530         0x7e680207, 0x7e6a0206,
1531         0x7e6c0205, 0x7e6e0204,
1532         0x7e700203, 0x7e720202,
1533         0x7e740201, 0x7e760200,
1534         0x7e780209, 0x7e7a0208,
1535         0x7e7c0207, 0x7e7e0206,
1536         0xbf8a0000, 0xbf810000,
1537 };
1538
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541         0xbe8a0100, 0xbe8c0102,
1542         0xbe8e0104, 0xbe900106,
1543         0xbe920108, 0xbe940100,
1544         0xbe960102, 0xbe980104,
1545         0xbe9a0106, 0xbe9c0108,
1546         0xbe9e0100, 0xbea00102,
1547         0xbea20104, 0xbea40106,
1548         0xbea60108, 0xbea80100,
1549         0xbeaa0102, 0xbeac0104,
1550         0xbeae0106, 0xbeb00108,
1551         0xbeb20100, 0xbeb40102,
1552         0xbeb60104, 0xbeb80106,
1553         0xbeba0108, 0xbebc0100,
1554         0xbebe0102, 0xbec00104,
1555         0xbec20106, 0xbec40108,
1556         0xbec60100, 0xbec80102,
1557         0xbee60004, 0xbee70005,
1558         0xbeea0006, 0xbeeb0007,
1559         0xbee80008, 0xbee90009,
1560         0xbefc0000, 0xbf8a0000,
1561         0xbf810000, 0x00000000,
1562 };
1563
1564 static const u32 vgpr_init_regs[] =
1565 {
1566         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567         mmCOMPUTE_RESOURCE_LIMITS, 0,
1568         mmCOMPUTE_NUM_THREAD_X, 256*4,
1569         mmCOMPUTE_NUM_THREAD_Y, 1,
1570         mmCOMPUTE_NUM_THREAD_Z, 1,
1571         mmCOMPUTE_PGM_RSRC2, 20,
1572         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588         mmCOMPUTE_NUM_THREAD_X, 256*5,
1589         mmCOMPUTE_NUM_THREAD_Y, 1,
1590         mmCOMPUTE_NUM_THREAD_Z, 1,
1591         mmCOMPUTE_PGM_RSRC2, 20,
1592         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608         mmCOMPUTE_NUM_THREAD_X, 256*5,
1609         mmCOMPUTE_NUM_THREAD_Y, 1,
1610         mmCOMPUTE_NUM_THREAD_Z, 1,
1611         mmCOMPUTE_PGM_RSRC2, 20,
1612         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626         mmCPC_EDC_ATC_CNT,
1627         mmCPC_EDC_SCRATCH_CNT,
1628         mmCPC_EDC_UCODE_CNT,
1629         mmCPF_EDC_ATC_CNT,
1630         mmCPF_EDC_ROQ_CNT,
1631         mmCPF_EDC_TAG_CNT,
1632         mmCPG_EDC_ATC_CNT,
1633         mmCPG_EDC_DMA_CNT,
1634         mmCPG_EDC_TAG_CNT,
1635         mmDC_EDC_CSINVOC_CNT,
1636         mmDC_EDC_RESTORE_CNT,
1637         mmDC_EDC_STATE_CNT,
1638         mmGDS_EDC_CNT,
1639         mmGDS_EDC_GRBM_CNT,
1640         mmGDS_EDC_OA_DED,
1641         mmSPI_EDC_CNT,
1642         mmSQC_ATC_EDC_GATCL1_CNT,
1643         mmSQC_EDC_CNT,
1644         mmSQ_EDC_DED_CNT,
1645         mmSQ_EDC_INFO,
1646         mmSQ_EDC_SEC_CNT,
1647         mmTCC_EDC_CNT,
1648         mmTCP_ATC_EDC_GATCL1_CNT,
1649         mmTCP_EDC_CNT,
1650         mmTD_EDC_CNT
1651 };
1652
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656         struct amdgpu_ib ib;
1657         struct dma_fence *f = NULL;
1658         int r, i;
1659         u32 tmp;
1660         unsigned total_size, vgpr_offset, sgpr_offset;
1661         u64 gpu_addr;
1662
1663         /* only supported on CZ */
1664         if (adev->asic_type != CHIP_CARRIZO)
1665                 return 0;
1666
1667         /* bail if the compute ring is not ready */
1668         if (!ring->ready)
1669                 return 0;
1670
1671         tmp = RREG32(mmGB_EDC_MODE);
1672         WREG32(mmGB_EDC_MODE, 0);
1673
1674         total_size =
1675                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676         total_size +=
1677                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678         total_size +=
1679                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680         total_size = ALIGN(total_size, 256);
1681         vgpr_offset = total_size;
1682         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683         sgpr_offset = total_size;
1684         total_size += sizeof(sgpr_init_compute_shader);
1685
1686         /* allocate an indirect buffer to put the commands in */
1687         memset(&ib, 0, sizeof(ib));
1688         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691                 return r;
1692         }
1693
1694         /* load the compute shaders */
1695         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701         /* init the ib length to 0 */
1702         ib.length_dw = 0;
1703
1704         /* VGPR */
1705         /* write the register state for the compute dispatch */
1706         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710         }
1711         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718         /* write dispatch packet */
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720         ib.ptr[ib.length_dw++] = 8; /* x */
1721         ib.ptr[ib.length_dw++] = 1; /* y */
1722         ib.ptr[ib.length_dw++] = 1; /* z */
1723         ib.ptr[ib.length_dw++] =
1724                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726         /* write CS partial flush packet */
1727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730         /* SGPR1 */
1731         /* write the register state for the compute dispatch */
1732         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736         }
1737         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744         /* write dispatch packet */
1745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746         ib.ptr[ib.length_dw++] = 8; /* x */
1747         ib.ptr[ib.length_dw++] = 1; /* y */
1748         ib.ptr[ib.length_dw++] = 1; /* z */
1749         ib.ptr[ib.length_dw++] =
1750                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752         /* write CS partial flush packet */
1753         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756         /* SGPR2 */
1757         /* write the register state for the compute dispatch */
1758         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762         }
1763         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770         /* write dispatch packet */
1771         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772         ib.ptr[ib.length_dw++] = 8; /* x */
1773         ib.ptr[ib.length_dw++] = 1; /* y */
1774         ib.ptr[ib.length_dw++] = 1; /* z */
1775         ib.ptr[ib.length_dw++] =
1776                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778         /* write CS partial flush packet */
1779         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782         /* shedule the ib on the ring */
1783         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784         if (r) {
1785                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786                 goto fail;
1787         }
1788
1789         /* wait for the GPU to finish processing the IB */
1790         r = dma_fence_wait(f, false);
1791         if (r) {
1792                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793                 goto fail;
1794         }
1795
1796         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798         WREG32(mmGB_EDC_MODE, tmp);
1799
1800         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805         /* read back registers to clear the counters */
1806         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807                 RREG32(sec_ded_counter_registers[i]);
1808
1809 fail:
1810         amdgpu_ib_free(adev, &ib, NULL);
1811         dma_fence_put(f);
1812
1813         return r;
1814 }
1815
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         u32 mc_shared_chmap, mc_arb_ramcfg;
1820         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821         u32 tmp;
1822         int ret;
1823
1824         switch (adev->asic_type) {
1825         case CHIP_TOPAZ:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_cu_per_sh = 6;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831                 adev->gfx.config.max_texture_channel_caches = 2;
1832                 adev->gfx.config.max_gprs = 256;
1833                 adev->gfx.config.max_gs_threads = 32;
1834                 adev->gfx.config.max_hw_contexts = 8;
1835
1836                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841                 break;
1842         case CHIP_FIJI:
1843                 adev->gfx.config.max_shader_engines = 4;
1844                 adev->gfx.config.max_tile_pipes = 16;
1845                 adev->gfx.config.max_cu_per_sh = 16;
1846                 adev->gfx.config.max_sh_per_se = 1;
1847                 adev->gfx.config.max_backends_per_se = 4;
1848                 adev->gfx.config.max_texture_channel_caches = 16;
1849                 adev->gfx.config.max_gprs = 256;
1850                 adev->gfx.config.max_gs_threads = 32;
1851                 adev->gfx.config.max_hw_contexts = 8;
1852
1853                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858                 break;
1859         case CHIP_POLARIS11:
1860         case CHIP_POLARIS12:
1861                 ret = amdgpu_atombios_get_gfx_info(adev);
1862                 if (ret)
1863                         return ret;
1864                 adev->gfx.config.max_gprs = 256;
1865                 adev->gfx.config.max_gs_threads = 32;
1866                 adev->gfx.config.max_hw_contexts = 8;
1867
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873                 break;
1874         case CHIP_POLARIS10:
1875                 ret = amdgpu_atombios_get_gfx_info(adev);
1876                 if (ret)
1877                         return ret;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_TONGA:
1889                 adev->gfx.config.max_shader_engines = 4;
1890                 adev->gfx.config.max_tile_pipes = 8;
1891                 adev->gfx.config.max_cu_per_sh = 8;
1892                 adev->gfx.config.max_sh_per_se = 1;
1893                 adev->gfx.config.max_backends_per_se = 2;
1894                 adev->gfx.config.max_texture_channel_caches = 8;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 32;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         case CHIP_CARRIZO:
1906                 adev->gfx.config.max_shader_engines = 1;
1907                 adev->gfx.config.max_tile_pipes = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910
1911                 switch (adev->pdev->revision) {
1912                 case 0xc4:
1913                 case 0x84:
1914                 case 0xc8:
1915                 case 0xcc:
1916                 case 0xe1:
1917                 case 0xe3:
1918                         /* B10 */
1919                         adev->gfx.config.max_cu_per_sh = 8;
1920                         break;
1921                 case 0xc5:
1922                 case 0x81:
1923                 case 0x85:
1924                 case 0xc9:
1925                 case 0xcd:
1926                 case 0xe2:
1927                 case 0xe4:
1928                         /* B8 */
1929                         adev->gfx.config.max_cu_per_sh = 6;
1930                         break;
1931                 case 0xc6:
1932                 case 0xca:
1933                 case 0xce:
1934                 case 0x88:
1935                         /* B6 */
1936                         adev->gfx.config.max_cu_per_sh = 6;
1937                         break;
1938                 case 0xc7:
1939                 case 0x87:
1940                 case 0xcb:
1941                 case 0xe5:
1942                 case 0x89:
1943                 default:
1944                         /* B4 */
1945                         adev->gfx.config.max_cu_per_sh = 4;
1946                         break;
1947                 }
1948
1949                 adev->gfx.config.max_texture_channel_caches = 2;
1950                 adev->gfx.config.max_gprs = 256;
1951                 adev->gfx.config.max_gs_threads = 32;
1952                 adev->gfx.config.max_hw_contexts = 8;
1953
1954                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959                 break;
1960         case CHIP_STONEY:
1961                 adev->gfx.config.max_shader_engines = 1;
1962                 adev->gfx.config.max_tile_pipes = 2;
1963                 adev->gfx.config.max_sh_per_se = 1;
1964                 adev->gfx.config.max_backends_per_se = 1;
1965
1966                 switch (adev->pdev->revision) {
1967                 case 0xc0:
1968                 case 0xc1:
1969                 case 0xc2:
1970                 case 0xc4:
1971                 case 0xc8:
1972                 case 0xc9:
1973                         adev->gfx.config.max_cu_per_sh = 3;
1974                         break;
1975                 case 0xd0:
1976                 case 0xd1:
1977                 case 0xd2:
1978                 default:
1979                         adev->gfx.config.max_cu_per_sh = 2;
1980                         break;
1981                 }
1982
1983                 adev->gfx.config.max_texture_channel_caches = 2;
1984                 adev->gfx.config.max_gprs = 256;
1985                 adev->gfx.config.max_gs_threads = 16;
1986                 adev->gfx.config.max_hw_contexts = 8;
1987
1988                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993                 break;
1994         default:
1995                 adev->gfx.config.max_shader_engines = 2;
1996                 adev->gfx.config.max_tile_pipes = 4;
1997                 adev->gfx.config.max_cu_per_sh = 2;
1998                 adev->gfx.config.max_sh_per_se = 1;
1999                 adev->gfx.config.max_backends_per_se = 2;
2000                 adev->gfx.config.max_texture_channel_caches = 4;
2001                 adev->gfx.config.max_gprs = 256;
2002                 adev->gfx.config.max_gs_threads = 32;
2003                 adev->gfx.config.max_hw_contexts = 8;
2004
2005                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010                 break;
2011         }
2012
2013         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016
2017         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018         adev->gfx.config.mem_max_burst_length_bytes = 256;
2019         if (adev->flags & AMD_IS_APU) {
2020                 /* Get memory bank mapping mode. */
2021                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024
2025                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028
2029                 /* Validate settings in case only one DIMM installed. */
2030                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031                         dimm00_addr_map = 0;
2032                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033                         dimm01_addr_map = 0;
2034                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035                         dimm10_addr_map = 0;
2036                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037                         dimm11_addr_map = 0;
2038
2039                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042                         adev->gfx.config.mem_row_size_in_kb = 2;
2043                 else
2044                         adev->gfx.config.mem_row_size_in_kb = 1;
2045         } else {
2046                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049                         adev->gfx.config.mem_row_size_in_kb = 4;
2050         }
2051
2052         adev->gfx.config.shader_engine_tile_size = 32;
2053         adev->gfx.config.num_gpus = 1;
2054         adev->gfx.config.multi_gpu_tile_size = 64;
2055
2056         /* fix up row size */
2057         switch (adev->gfx.config.mem_row_size_in_kb) {
2058         case 1:
2059         default:
2060                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061                 break;
2062         case 2:
2063                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064                 break;
2065         case 4:
2066                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067                 break;
2068         }
2069         adev->gfx.config.gb_addr_config = gb_addr_config;
2070
2071         return 0;
2072 }
2073
2074 static int gfx_v8_0_sw_init(void *handle)
2075 {
2076         int i, r;
2077         struct amdgpu_ring *ring;
2078         struct amdgpu_kiq *kiq;
2079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080
2081         /* KIQ event */
2082         r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2083         if (r)
2084                 return r;
2085
2086         /* EOP Event */
2087         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2088         if (r)
2089                 return r;
2090
2091         /* Privileged reg */
2092         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2093         if (r)
2094                 return r;
2095
2096         /* Privileged inst */
2097         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2098         if (r)
2099                 return r;
2100
2101         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2102
2103         gfx_v8_0_scratch_init(adev);
2104
2105         r = gfx_v8_0_init_microcode(adev);
2106         if (r) {
2107                 DRM_ERROR("Failed to load gfx firmware!\n");
2108                 return r;
2109         }
2110
2111         r = gfx_v8_0_rlc_init(adev);
2112         if (r) {
2113                 DRM_ERROR("Failed to init rlc BOs!\n");
2114                 return r;
2115         }
2116
2117         r = gfx_v8_0_mec_init(adev);
2118         if (r) {
2119                 DRM_ERROR("Failed to init MEC BOs!\n");
2120                 return r;
2121         }
2122
2123         r = gfx_v8_0_kiq_init(adev);
2124         if (r) {
2125                 DRM_ERROR("Failed to init KIQ BOs!\n");
2126                 return r;
2127         }
2128
2129         kiq = &adev->gfx.kiq;
2130         r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2131         if (r)
2132                 return r;
2133
2134         /* set up the gfx ring */
2135         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2136                 ring = &adev->gfx.gfx_ring[i];
2137                 ring->ring_obj = NULL;
2138                 sprintf(ring->name, "gfx");
2139                 /* no gfx doorbells on iceland */
2140                 if (adev->asic_type != CHIP_TOPAZ) {
2141                         ring->use_doorbell = true;
2142                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2143                 }
2144
2145                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2146                                      AMDGPU_CP_IRQ_GFX_EOP);
2147                 if (r)
2148                         return r;
2149         }
2150
2151         /* set up the compute queues */
2152         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2153                 unsigned irq_type;
2154
2155                 /* max 32 queues per MEC */
2156                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2157                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2158                         break;
2159                 }
2160                 ring = &adev->gfx.compute_ring[i];
2161                 ring->ring_obj = NULL;
2162                 ring->use_doorbell = true;
2163                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2164                 ring->me = 1; /* first MEC */
2165                 ring->pipe = i / 8;
2166                 ring->queue = i % 8;
2167                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2168                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2169                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2170                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2171                                      irq_type);
2172                 if (r)
2173                         return r;
2174         }
2175
2176         /* reserve GDS, GWS and OA resource for gfx */
2177         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2178                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2179                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2180         if (r)
2181                 return r;
2182
2183         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2184                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2185                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2186         if (r)
2187                 return r;
2188
2189         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2190                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2191                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2192         if (r)
2193                 return r;
2194
2195         adev->gfx.ce_ram_size = 0x8000;
2196
2197         r = gfx_v8_0_gpu_early_init(adev);
2198         if (r)
2199                 return r;
2200
2201         return 0;
2202 }
2203
2204 static int gfx_v8_0_sw_fini(void *handle)
2205 {
2206         int i;
2207         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2208
2209         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2210         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2211         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2212
2213         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2214                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2215         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2216                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2217         gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2218
2219         gfx_v8_0_kiq_fini(adev);
2220         gfx_v8_0_mec_fini(adev);
2221         gfx_v8_0_rlc_fini(adev);
2222         gfx_v8_0_free_microcode(adev);
2223
2224         return 0;
2225 }
2226
2227 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2228 {
2229         uint32_t *modearray, *mod2array;
2230         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2231         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2232         u32 reg_offset;
2233
2234         modearray = adev->gfx.config.tile_mode_array;
2235         mod2array = adev->gfx.config.macrotile_mode_array;
2236
2237         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2238                 modearray[reg_offset] = 0;
2239
2240         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2241                 mod2array[reg_offset] = 0;
2242
2243         switch (adev->asic_type) {
2244         case CHIP_TOPAZ:
2245                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2) |
2247                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2248                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                 PIPE_CONFIG(ADDR_SURF_P2) |
2251                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2252                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                 PIPE_CONFIG(ADDR_SURF_P2) |
2255                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258                                 PIPE_CONFIG(ADDR_SURF_P2) |
2259                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262                                 PIPE_CONFIG(ADDR_SURF_P2) |
2263                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266                                 PIPE_CONFIG(ADDR_SURF_P2) |
2267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2270                                 PIPE_CONFIG(ADDR_SURF_P2) |
2271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2274                                 PIPE_CONFIG(ADDR_SURF_P2));
2275                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2278                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2287                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2319                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2320                                  PIPE_CONFIG(ADDR_SURF_P2) |
2321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2323                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2324                                  PIPE_CONFIG(ADDR_SURF_P2) |
2325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2328                                  PIPE_CONFIG(ADDR_SURF_P2) |
2329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2332                                  PIPE_CONFIG(ADDR_SURF_P2) |
2333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336                                  PIPE_CONFIG(ADDR_SURF_P2) |
2337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                  PIPE_CONFIG(ADDR_SURF_P2) |
2341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                  PIPE_CONFIG(ADDR_SURF_P2) |
2345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347
2348                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351                                 NUM_BANKS(ADDR_SURF_8_BANK));
2352                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355                                 NUM_BANKS(ADDR_SURF_8_BANK));
2356                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2357                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359                                 NUM_BANKS(ADDR_SURF_8_BANK));
2360                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2361                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                 NUM_BANKS(ADDR_SURF_8_BANK));
2364                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367                                 NUM_BANKS(ADDR_SURF_8_BANK));
2368                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                                 NUM_BANKS(ADDR_SURF_8_BANK));
2372                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                 NUM_BANKS(ADDR_SURF_8_BANK));
2376                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2379                                 NUM_BANKS(ADDR_SURF_16_BANK));
2380                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2383                                 NUM_BANKS(ADDR_SURF_16_BANK));
2384                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2385                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2387                                  NUM_BANKS(ADDR_SURF_16_BANK));
2388                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2389                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2390                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2391                                  NUM_BANKS(ADDR_SURF_16_BANK));
2392                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2395                                  NUM_BANKS(ADDR_SURF_16_BANK));
2396                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399                                  NUM_BANKS(ADDR_SURF_16_BANK));
2400                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2403                                  NUM_BANKS(ADDR_SURF_8_BANK));
2404
2405                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2406                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2407                             reg_offset != 23)
2408                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2409
2410                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2411                         if (reg_offset != 7)
2412                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2413
2414                 break;
2415         case CHIP_FIJI:
2416                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538
2539                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542                                 NUM_BANKS(ADDR_SURF_8_BANK));
2543                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546                                 NUM_BANKS(ADDR_SURF_8_BANK));
2547                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550                                 NUM_BANKS(ADDR_SURF_8_BANK));
2551                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                 NUM_BANKS(ADDR_SURF_8_BANK));
2555                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558                                 NUM_BANKS(ADDR_SURF_8_BANK));
2559                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562                                 NUM_BANKS(ADDR_SURF_8_BANK));
2563                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                 NUM_BANKS(ADDR_SURF_8_BANK));
2567                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570                                 NUM_BANKS(ADDR_SURF_8_BANK));
2571                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574                                 NUM_BANKS(ADDR_SURF_8_BANK));
2575                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                  NUM_BANKS(ADDR_SURF_8_BANK));
2579                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582                                  NUM_BANKS(ADDR_SURF_8_BANK));
2583                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586                                  NUM_BANKS(ADDR_SURF_8_BANK));
2587                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590                                  NUM_BANKS(ADDR_SURF_8_BANK));
2591                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594                                  NUM_BANKS(ADDR_SURF_4_BANK));
2595
2596                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2598
2599                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600                         if (reg_offset != 7)
2601                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2602
2603                 break;
2604         case CHIP_TONGA:
2605                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727
2728                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                 NUM_BANKS(ADDR_SURF_16_BANK));
2748                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759                                 NUM_BANKS(ADDR_SURF_16_BANK));
2760                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767                                  NUM_BANKS(ADDR_SURF_16_BANK));
2768                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775                                  NUM_BANKS(ADDR_SURF_8_BANK));
2776                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779                                  NUM_BANKS(ADDR_SURF_4_BANK));
2780                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783                                  NUM_BANKS(ADDR_SURF_4_BANK));
2784
2785                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2787
2788                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789                         if (reg_offset != 7)
2790                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791
2792                 break;
2793         case CHIP_POLARIS11:
2794         case CHIP_POLARIS12:
2795                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917
2918                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946                                 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951                                 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956                                 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961                                 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                 NUM_BANKS(ADDR_SURF_16_BANK));
2967
2968                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976                                 NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981                                 NUM_BANKS(ADDR_SURF_8_BANK));
2982
2983                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986                                 NUM_BANKS(ADDR_SURF_4_BANK));
2987
2988                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2990
2991                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992                         if (reg_offset != 7)
2993                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994
2995                 break;
2996         case CHIP_POLARIS10:
2997                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3119
3120                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123                                 NUM_BANKS(ADDR_SURF_16_BANK));
3124
3125                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128                                 NUM_BANKS(ADDR_SURF_16_BANK));
3129
3130                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                 NUM_BANKS(ADDR_SURF_16_BANK));
3134
3135                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                 NUM_BANKS(ADDR_SURF_16_BANK));
3139
3140                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143                                 NUM_BANKS(ADDR_SURF_16_BANK));
3144
3145                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148                                 NUM_BANKS(ADDR_SURF_16_BANK));
3149
3150                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153                                 NUM_BANKS(ADDR_SURF_16_BANK));
3154
3155                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158                                 NUM_BANKS(ADDR_SURF_16_BANK));
3159
3160                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163                                 NUM_BANKS(ADDR_SURF_16_BANK));
3164
3165                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168                                 NUM_BANKS(ADDR_SURF_16_BANK));
3169
3170                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173                                 NUM_BANKS(ADDR_SURF_16_BANK));
3174
3175                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178                                 NUM_BANKS(ADDR_SURF_8_BANK));
3179
3180                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183                                 NUM_BANKS(ADDR_SURF_4_BANK));
3184
3185                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188                                 NUM_BANKS(ADDR_SURF_4_BANK));
3189
3190                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3192
3193                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194                         if (reg_offset != 7)
3195                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196
3197                 break;
3198         case CHIP_STONEY:
3199                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                 PIPE_CONFIG(ADDR_SURF_P2) |
3201                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                 PIPE_CONFIG(ADDR_SURF_P2) |
3205                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208                                 PIPE_CONFIG(ADDR_SURF_P2) |
3209                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212                                 PIPE_CONFIG(ADDR_SURF_P2) |
3213                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216                                 PIPE_CONFIG(ADDR_SURF_P2) |
3217                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220                                 PIPE_CONFIG(ADDR_SURF_P2) |
3221                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                 PIPE_CONFIG(ADDR_SURF_P2) |
3225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2));
3229                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234                                  PIPE_CONFIG(ADDR_SURF_P2) |
3235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                  PIPE_CONFIG(ADDR_SURF_P2) |
3239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242                                  PIPE_CONFIG(ADDR_SURF_P2) |
3243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246                                  PIPE_CONFIG(ADDR_SURF_P2) |
3247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258                                  PIPE_CONFIG(ADDR_SURF_P2) |
3259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266                                  PIPE_CONFIG(ADDR_SURF_P2) |
3267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270                                  PIPE_CONFIG(ADDR_SURF_P2) |
3271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274                                  PIPE_CONFIG(ADDR_SURF_P2) |
3275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278                                  PIPE_CONFIG(ADDR_SURF_P2) |
3279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282                                  PIPE_CONFIG(ADDR_SURF_P2) |
3283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286                                  PIPE_CONFIG(ADDR_SURF_P2) |
3287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290                                  PIPE_CONFIG(ADDR_SURF_P2) |
3291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                  PIPE_CONFIG(ADDR_SURF_P2) |
3295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298                                  PIPE_CONFIG(ADDR_SURF_P2) |
3299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3301
3302                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305                                 NUM_BANKS(ADDR_SURF_8_BANK));
3306                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                 NUM_BANKS(ADDR_SURF_8_BANK));
3310                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313                                 NUM_BANKS(ADDR_SURF_8_BANK));
3314                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317                                 NUM_BANKS(ADDR_SURF_8_BANK));
3318                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321                                 NUM_BANKS(ADDR_SURF_8_BANK));
3322                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325                                 NUM_BANKS(ADDR_SURF_8_BANK));
3326                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329                                 NUM_BANKS(ADDR_SURF_8_BANK));
3330                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333                                 NUM_BANKS(ADDR_SURF_16_BANK));
3334                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                 NUM_BANKS(ADDR_SURF_16_BANK));
3338                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341                                  NUM_BANKS(ADDR_SURF_16_BANK));
3342                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345                                  NUM_BANKS(ADDR_SURF_16_BANK));
3346                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353                                  NUM_BANKS(ADDR_SURF_16_BANK));
3354                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357                                  NUM_BANKS(ADDR_SURF_8_BANK));
3358
3359                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361                             reg_offset != 23)
3362                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3363
3364                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365                         if (reg_offset != 7)
3366                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3367
3368                 break;
3369         default:
3370                 dev_warn(adev->dev,
3371                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372                          adev->asic_type);
3373
3374         case CHIP_CARRIZO:
3375                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376                                 PIPE_CONFIG(ADDR_SURF_P2) |
3377                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                 PIPE_CONFIG(ADDR_SURF_P2) |
3381                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384                                 PIPE_CONFIG(ADDR_SURF_P2) |
3385                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388                                 PIPE_CONFIG(ADDR_SURF_P2) |
3389                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392                                 PIPE_CONFIG(ADDR_SURF_P2) |
3393                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396                                 PIPE_CONFIG(ADDR_SURF_P2) |
3397                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400                                 PIPE_CONFIG(ADDR_SURF_P2) |
3401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2));
3405                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410                                  PIPE_CONFIG(ADDR_SURF_P2) |
3411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414                                  PIPE_CONFIG(ADDR_SURF_P2) |
3415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418                                  PIPE_CONFIG(ADDR_SURF_P2) |
3419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422                                  PIPE_CONFIG(ADDR_SURF_P2) |
3423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426                                  PIPE_CONFIG(ADDR_SURF_P2) |
3427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430                                  PIPE_CONFIG(ADDR_SURF_P2) |
3431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434                                  PIPE_CONFIG(ADDR_SURF_P2) |
3435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438                                  PIPE_CONFIG(ADDR_SURF_P2) |
3439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442                                  PIPE_CONFIG(ADDR_SURF_P2) |
3443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446                                  PIPE_CONFIG(ADDR_SURF_P2) |
3447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450                                  PIPE_CONFIG(ADDR_SURF_P2) |
3451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454                                  PIPE_CONFIG(ADDR_SURF_P2) |
3455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458                                  PIPE_CONFIG(ADDR_SURF_P2) |
3459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462                                  PIPE_CONFIG(ADDR_SURF_P2) |
3463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466                                  PIPE_CONFIG(ADDR_SURF_P2) |
3467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470                                  PIPE_CONFIG(ADDR_SURF_P2) |
3471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474                                  PIPE_CONFIG(ADDR_SURF_P2) |
3475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3477
3478                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481                                 NUM_BANKS(ADDR_SURF_8_BANK));
3482                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485                                 NUM_BANKS(ADDR_SURF_8_BANK));
3486                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489                                 NUM_BANKS(ADDR_SURF_8_BANK));
3490                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493                                 NUM_BANKS(ADDR_SURF_8_BANK));
3494                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497                                 NUM_BANKS(ADDR_SURF_8_BANK));
3498                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501                                 NUM_BANKS(ADDR_SURF_8_BANK));
3502                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505                                 NUM_BANKS(ADDR_SURF_8_BANK));
3506                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509                                 NUM_BANKS(ADDR_SURF_16_BANK));
3510                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513                                 NUM_BANKS(ADDR_SURF_16_BANK));
3514                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517                                  NUM_BANKS(ADDR_SURF_16_BANK));
3518                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521                                  NUM_BANKS(ADDR_SURF_16_BANK));
3522                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525                                  NUM_BANKS(ADDR_SURF_16_BANK));
3526                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529                                  NUM_BANKS(ADDR_SURF_16_BANK));
3530                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533                                  NUM_BANKS(ADDR_SURF_8_BANK));
3534
3535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537                             reg_offset != 23)
3538                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3539
3540                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541                         if (reg_offset != 7)
3542                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3543
3544                 break;
3545         }
3546 }
3547
3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3549                                   u32 se_num, u32 sh_num, u32 instance)
3550 {
3551         u32 data;
3552
3553         if (instance == 0xffffffff)
3554                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555         else
3556                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3557
3558         if (se_num == 0xffffffff)
3559                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3560         else
3561                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3562
3563         if (sh_num == 0xffffffff)
3564                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565         else
3566                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3567
3568         WREG32(mmGRBM_GFX_INDEX, data);
3569 }
3570
3571 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3572 {
3573         return (u32)((1ULL << bit_width) - 1);
3574 }
3575
3576 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3577 {
3578         u32 data, mask;
3579
3580         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3581                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3582
3583         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3584
3585         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3586                                        adev->gfx.config.max_sh_per_se);
3587
3588         return (~data) & mask;
3589 }
3590
3591 static void
3592 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3593 {
3594         switch (adev->asic_type) {
3595         case CHIP_FIJI:
3596                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597                           RB_XSEL2(1) | PKR_MAP(2) |
3598                           PKR_XSEL(1) | PKR_YSEL(1) |
3599                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3601                            SE_PAIR_YSEL(2);
3602                 break;
3603         case CHIP_TONGA:
3604         case CHIP_POLARIS10:
3605                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606                           SE_XSEL(1) | SE_YSEL(1);
3607                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3608                            SE_PAIR_YSEL(2);
3609                 break;
3610         case CHIP_TOPAZ:
3611         case CHIP_CARRIZO:
3612                 *rconf |= RB_MAP_PKR0(2);
3613                 *rconf1 |= 0x0;
3614                 break;
3615         case CHIP_POLARIS11:
3616         case CHIP_POLARIS12:
3617                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618                           SE_XSEL(1) | SE_YSEL(1);
3619                 *rconf1 |= 0x0;
3620                 break;
3621         case CHIP_STONEY:
3622                 *rconf |= 0x0;
3623                 *rconf1 |= 0x0;
3624                 break;
3625         default:
3626                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3627                 break;
3628         }
3629 }
3630
3631 static void
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633                                         u32 raster_config, u32 raster_config_1,
3634                                         unsigned rb_mask, unsigned num_rb)
3635 {
3636         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639         unsigned rb_per_se = num_rb / num_se;
3640         unsigned se_mask[4];
3641         unsigned se;
3642
3643         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3647
3648         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3651
3652         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653                              (!se_mask[2] && !se_mask[3]))) {
3654                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3655
3656                 if (!se_mask[0] && !se_mask[1]) {
3657                         raster_config_1 |=
3658                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3659                 } else {
3660                         raster_config_1 |=
3661                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3662                 }
3663         }
3664
3665         for (se = 0; se < num_se; se++) {
3666                 unsigned raster_config_se = raster_config;
3667                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669                 int idx = (se / 2) * 2;
3670
3671                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672                         raster_config_se &= ~SE_MAP_MASK;
3673
3674                         if (!se_mask[idx]) {
3675                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3676                         } else {
3677                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3678                         }
3679                 }
3680
3681                 pkr0_mask &= rb_mask;
3682                 pkr1_mask &= rb_mask;
3683                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684                         raster_config_se &= ~PKR_MAP_MASK;
3685
3686                         if (!pkr0_mask) {
3687                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3688                         } else {
3689                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3690                         }
3691                 }
3692
3693                 if (rb_per_se >= 2) {
3694                         unsigned rb0_mask = 1 << (se * rb_per_se);
3695                         unsigned rb1_mask = rb0_mask << 1;
3696
3697                         rb0_mask &= rb_mask;
3698                         rb1_mask &= rb_mask;
3699                         if (!rb0_mask || !rb1_mask) {
3700                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3701
3702                                 if (!rb0_mask) {
3703                                         raster_config_se |=
3704                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3705                                 } else {
3706                                         raster_config_se |=
3707                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3708                                 }
3709                         }
3710
3711                         if (rb_per_se > 2) {
3712                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713                                 rb1_mask = rb0_mask << 1;
3714                                 rb0_mask &= rb_mask;
3715                                 rb1_mask &= rb_mask;
3716                                 if (!rb0_mask || !rb1_mask) {
3717                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3718
3719                                         if (!rb0_mask) {
3720                                                 raster_config_se |=
3721                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3722                                         } else {
3723                                                 raster_config_se |=
3724                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3725                                         }
3726                                 }
3727                         }
3728                 }
3729
3730                 /* GRBM_GFX_INDEX has a different offset on VI */
3731                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734         }
3735
3736         /* GRBM_GFX_INDEX has a different offset on VI */
3737         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3738 }
3739
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3741 {
3742         int i, j;
3743         u32 data;
3744         u32 raster_config = 0, raster_config_1 = 0;
3745         u32 active_rbs = 0;
3746         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747                                         adev->gfx.config.max_sh_per_se;
3748         unsigned num_rb_pipes;
3749
3750         mutex_lock(&adev->grbm_idx_mutex);
3751         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3755                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756                                                rb_bitmap_width_per_sh);
3757                 }
3758         }
3759         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760
3761         adev->gfx.config.backend_enable_mask = active_rbs;
3762         adev->gfx.config.num_rbs = hweight32(active_rbs);
3763
3764         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765                              adev->gfx.config.max_shader_engines, 16);
3766
3767         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3768
3769         if (!adev->gfx.config.backend_enable_mask ||
3770                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3771                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3773         } else {
3774                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775                                                         adev->gfx.config.backend_enable_mask,
3776                                                         num_rb_pipes);
3777         }
3778
3779         /* cache the values for userspace */
3780         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3785                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787                         adev->gfx.config.rb_config[i][j].raster_config =
3788                                 RREG32(mmPA_SC_RASTER_CONFIG);
3789                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3790                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3791                 }
3792         }
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794         mutex_unlock(&adev->grbm_idx_mutex);
3795 }
3796
3797 /**
3798  * gfx_v8_0_init_compute_vmid - gart enable
3799  *
3800  * @rdev: amdgpu_device pointer
3801  *
3802  * Initialize compute vmid sh_mem registers
3803  *
3804  */
3805 #define DEFAULT_SH_MEM_BASES    (0x6000)
3806 #define FIRST_COMPUTE_VMID      (8)
3807 #define LAST_COMPUTE_VMID       (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3809 {
3810         int i;
3811         uint32_t sh_mem_config;
3812         uint32_t sh_mem_bases;
3813
3814         /*
3815          * Configure apertures:
3816          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3819          */
3820         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3821
3822         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3828
3829         mutex_lock(&adev->srbm_mutex);
3830         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831                 vi_srbm_select(adev, 0, 0, 0, i);
3832                 /* CP and shaders */
3833                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834                 WREG32(mmSH_MEM_APE1_BASE, 1);
3835                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3837         }
3838         vi_srbm_select(adev, 0, 0, 0, 0);
3839         mutex_unlock(&adev->srbm_mutex);
3840 }
3841
3842 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3843 {
3844         u32 tmp;
3845         int i;
3846
3847         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3848         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3849         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3850         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3851
3852         gfx_v8_0_tiling_mode_table_init(adev);
3853         gfx_v8_0_setup_rb(adev);
3854         gfx_v8_0_get_cu_info(adev);
3855
3856         /* XXX SH_MEM regs */
3857         /* where to put LDS, scratch, GPUVM in FSA64 space */
3858         mutex_lock(&adev->srbm_mutex);
3859         for (i = 0; i < 16; i++) {
3860                 vi_srbm_select(adev, 0, 0, 0, i);
3861                 /* CP and shaders */
3862                 if (i == 0) {
3863                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3864                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3865                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3866                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3867                         WREG32(mmSH_MEM_CONFIG, tmp);
3868                 } else {
3869                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3870                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3871                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3872                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3873                         WREG32(mmSH_MEM_CONFIG, tmp);
3874                 }
3875
3876                 WREG32(mmSH_MEM_APE1_BASE, 1);
3877                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3878                 WREG32(mmSH_MEM_BASES, 0);
3879         }
3880         vi_srbm_select(adev, 0, 0, 0, 0);
3881         mutex_unlock(&adev->srbm_mutex);
3882
3883         gfx_v8_0_init_compute_vmid(adev);
3884
3885         mutex_lock(&adev->grbm_idx_mutex);
3886         /*
3887          * making sure that the following register writes will be broadcasted
3888          * to all the shaders
3889          */
3890         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3891
3892         WREG32(mmPA_SC_FIFO_SIZE,
3893                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3894                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3895                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3896                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3897                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3898                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3899                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3900                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3901
3902         tmp = RREG32(mmSPI_ARB_PRIORITY);
3903         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3904         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3905         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3906         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3907         WREG32(mmSPI_ARB_PRIORITY, tmp);
3908
3909         mutex_unlock(&adev->grbm_idx_mutex);
3910
3911 }
3912
3913 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3914 {
3915         u32 i, j, k;
3916         u32 mask;
3917
3918         mutex_lock(&adev->grbm_idx_mutex);
3919         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3920                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3921                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3922                         for (k = 0; k < adev->usec_timeout; k++) {
3923                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3924                                         break;
3925                                 udelay(1);
3926                         }
3927                 }
3928         }
3929         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3930         mutex_unlock(&adev->grbm_idx_mutex);
3931
3932         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3933                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3934                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3935                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3936         for (k = 0; k < adev->usec_timeout; k++) {
3937                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3938                         break;
3939                 udelay(1);
3940         }
3941 }
3942
3943 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3944                                                bool enable)
3945 {
3946         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3947
3948         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3949         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3950         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3951         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3952
3953         WREG32(mmCP_INT_CNTL_RING0, tmp);
3954 }
3955
3956 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3957 {
3958         /* csib */
3959         WREG32(mmRLC_CSIB_ADDR_HI,
3960                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3961         WREG32(mmRLC_CSIB_ADDR_LO,
3962                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3963         WREG32(mmRLC_CSIB_LENGTH,
3964                         adev->gfx.rlc.clear_state_size);
3965 }
3966
3967 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3968                                 int ind_offset,
3969                                 int list_size,
3970                                 int *unique_indices,
3971                                 int *indices_count,
3972                                 int max_indices,
3973                                 int *ind_start_offsets,
3974                                 int *offset_count,
3975                                 int max_offset)
3976 {
3977         int indices;
3978         bool new_entry = true;
3979
3980         for (; ind_offset < list_size; ind_offset++) {
3981
3982                 if (new_entry) {
3983                         new_entry = false;
3984                         ind_start_offsets[*offset_count] = ind_offset;
3985                         *offset_count = *offset_count + 1;
3986                         BUG_ON(*offset_count >= max_offset);
3987                 }
3988
3989                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3990                         new_entry = true;
3991                         continue;
3992                 }
3993
3994                 ind_offset += 2;
3995
3996                 /* look for the matching indice */
3997                 for (indices = 0;
3998                         indices < *indices_count;
3999                         indices++) {
4000                         if (unique_indices[indices] ==
4001                                 register_list_format[ind_offset])
4002                                 break;
4003                 }
4004
4005                 if (indices >= *indices_count) {
4006                         unique_indices[*indices_count] =
4007                                 register_list_format[ind_offset];
4008                         indices = *indices_count;
4009                         *indices_count = *indices_count + 1;
4010                         BUG_ON(*indices_count >= max_indices);
4011                 }
4012
4013                 register_list_format[ind_offset] = indices;
4014         }
4015 }
4016
4017 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4018 {
4019         int i, temp, data;
4020         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4021         int indices_count = 0;
4022         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4023         int offset_count = 0;
4024
4025         int list_size;
4026         unsigned int *register_list_format =
4027                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4028         if (!register_list_format)
4029                 return -ENOMEM;
4030         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4031                         adev->gfx.rlc.reg_list_format_size_bytes);
4032
4033         gfx_v8_0_parse_ind_reg_list(register_list_format,
4034                                 RLC_FormatDirectRegListLength,
4035                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4036                                 unique_indices,
4037                                 &indices_count,
4038                                 sizeof(unique_indices) / sizeof(int),
4039                                 indirect_start_offsets,
4040                                 &offset_count,
4041                                 sizeof(indirect_start_offsets)/sizeof(int));
4042
4043         /* save and restore list */
4044         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4045
4046         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4047         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4048                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4049
4050         /* indirect list */
4051         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4052         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4053                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4054
4055         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4056         list_size = list_size >> 1;
4057         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4058         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4059
4060         /* starting offsets starts */
4061         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4062                 adev->gfx.rlc.starting_offsets_start);
4063         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4064                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4065                                 indirect_start_offsets[i]);
4066
4067         /* unique indices */
4068         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4069         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4070         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4071                 if (unique_indices[i] != 0) {
4072                         amdgpu_mm_wreg(adev, temp + i,
4073                                         unique_indices[i] & 0x3FFFF, false);
4074                         amdgpu_mm_wreg(adev, data + i,
4075                                         unique_indices[i] >> 20, false);
4076                 }
4077         }
4078         kfree(register_list_format);
4079
4080         return 0;
4081 }
4082
4083 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4084 {
4085         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4086 }
4087
4088 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4089 {
4090         uint32_t data;
4091
4092         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4093
4094         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4095         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4096         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4097         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4098         WREG32(mmRLC_PG_DELAY, data);
4099
4100         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4101         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4102
4103 }
4104
4105 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4106                                                 bool enable)
4107 {
4108         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4109 }
4110
4111 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4112                                                   bool enable)
4113 {
4114         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4115 }
4116
4117 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4118 {
4119         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4120 }
4121
4122 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4123 {
4124         if ((adev->asic_type == CHIP_CARRIZO) ||
4125             (adev->asic_type == CHIP_STONEY)) {
4126                 gfx_v8_0_init_csb(adev);
4127                 gfx_v8_0_init_save_restore_list(adev);
4128                 gfx_v8_0_enable_save_restore_machine(adev);
4129                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4130                 gfx_v8_0_init_power_gating(adev);
4131                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4132         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4133                    (adev->asic_type == CHIP_POLARIS12)) {
4134                 gfx_v8_0_init_csb(adev);
4135                 gfx_v8_0_init_save_restore_list(adev);
4136                 gfx_v8_0_enable_save_restore_machine(adev);
4137                 gfx_v8_0_init_power_gating(adev);
4138         }
4139
4140 }
4141
4142 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4143 {
4144         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4145
4146         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4147         gfx_v8_0_wait_for_rlc_serdes(adev);
4148 }
4149
4150 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4151 {
4152         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4153         udelay(50);
4154
4155         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4156         udelay(50);
4157 }
4158
4159 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4160 {
4161         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4162
4163         /* carrizo do enable cp interrupt after cp inited */
4164         if (!(adev->flags & AMD_IS_APU))
4165                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4166
4167         udelay(50);
4168 }
4169
4170 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4171 {
4172         const struct rlc_firmware_header_v2_0 *hdr;
4173         const __le32 *fw_data;
4174         unsigned i, fw_size;
4175
4176         if (!adev->gfx.rlc_fw)
4177                 return -EINVAL;
4178
4179         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4180         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4181
4182         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4183                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4184         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4185
4186         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4187         for (i = 0; i < fw_size; i++)
4188                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4189         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4190
4191         return 0;
4192 }
4193
4194 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4195 {
4196         int r;
4197         u32 tmp;
4198
4199         gfx_v8_0_rlc_stop(adev);
4200
4201         /* disable CG */
4202         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4203         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4204                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4205         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4206         if (adev->asic_type == CHIP_POLARIS11 ||
4207             adev->asic_type == CHIP_POLARIS10 ||
4208             adev->asic_type == CHIP_POLARIS12) {
4209                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4210                 tmp &= ~0x3;
4211                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4212         }
4213
4214         /* disable PG */
4215         WREG32(mmRLC_PG_CNTL, 0);
4216
4217         gfx_v8_0_rlc_reset(adev);
4218         gfx_v8_0_init_pg(adev);
4219
4220         if (!adev->pp_enabled) {
4221                 if (!adev->firmware.smu_load) {
4222                         /* legacy rlc firmware loading */
4223                         r = gfx_v8_0_rlc_load_microcode(adev);
4224                         if (r)
4225                                 return r;
4226                 } else {
4227                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4228                                                         AMDGPU_UCODE_ID_RLC_G);
4229                         if (r)
4230                                 return -EINVAL;
4231                 }
4232         }
4233
4234         gfx_v8_0_rlc_start(adev);
4235
4236         return 0;
4237 }
4238
4239 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4240 {
4241         int i;
4242         u32 tmp = RREG32(mmCP_ME_CNTL);
4243
4244         if (enable) {
4245                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4246                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4247                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4248         } else {
4249                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4250                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4251                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4252                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4253                         adev->gfx.gfx_ring[i].ready = false;
4254         }
4255         WREG32(mmCP_ME_CNTL, tmp);
4256         udelay(50);
4257 }
4258
4259 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4260 {
4261         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4262         const struct gfx_firmware_header_v1_0 *ce_hdr;
4263         const struct gfx_firmware_header_v1_0 *me_hdr;
4264         const __le32 *fw_data;
4265         unsigned i, fw_size;
4266
4267         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4268                 return -EINVAL;
4269
4270         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4271                 adev->gfx.pfp_fw->data;
4272         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4273                 adev->gfx.ce_fw->data;
4274         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4275                 adev->gfx.me_fw->data;
4276
4277         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4278         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4279         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4280
4281         gfx_v8_0_cp_gfx_enable(adev, false);
4282
4283         /* PFP */
4284         fw_data = (const __le32 *)
4285                 (adev->gfx.pfp_fw->data +
4286                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4287         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4288         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4289         for (i = 0; i < fw_size; i++)
4290                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4291         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4292
4293         /* CE */
4294         fw_data = (const __le32 *)
4295                 (adev->gfx.ce_fw->data +
4296                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4297         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4298         WREG32(mmCP_CE_UCODE_ADDR, 0);
4299         for (i = 0; i < fw_size; i++)
4300                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4301         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4302
4303         /* ME */
4304         fw_data = (const __le32 *)
4305                 (adev->gfx.me_fw->data +
4306                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4307         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4308         WREG32(mmCP_ME_RAM_WADDR, 0);
4309         for (i = 0; i < fw_size; i++)
4310                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4311         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4312
4313         return 0;
4314 }
4315
4316 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4317 {
4318         u32 count = 0;
4319         const struct cs_section_def *sect = NULL;
4320         const struct cs_extent_def *ext = NULL;
4321
4322         /* begin clear state */
4323         count += 2;
4324         /* context control state */
4325         count += 3;
4326
4327         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4328                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4329                         if (sect->id == SECT_CONTEXT)
4330                                 count += 2 + ext->reg_count;
4331                         else
4332                                 return 0;
4333                 }
4334         }
4335         /* pa_sc_raster_config/pa_sc_raster_config1 */
4336         count += 4;
4337         /* end clear state */
4338         count += 2;
4339         /* clear state */
4340         count += 2;
4341
4342         return count;
4343 }
4344
4345 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4346 {
4347         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4348         const struct cs_section_def *sect = NULL;
4349         const struct cs_extent_def *ext = NULL;
4350         int r, i;
4351
4352         /* init the CP */
4353         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4354         WREG32(mmCP_ENDIAN_SWAP, 0);
4355         WREG32(mmCP_DEVICE_ID, 1);
4356
4357         gfx_v8_0_cp_gfx_enable(adev, true);
4358
4359         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4360         if (r) {
4361                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4362                 return r;
4363         }
4364
4365         /* clear state buffer */
4366         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4367         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4368
4369         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4370         amdgpu_ring_write(ring, 0x80000000);
4371         amdgpu_ring_write(ring, 0x80000000);
4372
4373         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4374                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4375                         if (sect->id == SECT_CONTEXT) {
4376                                 amdgpu_ring_write(ring,
4377                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4378                                                ext->reg_count));
4379                                 amdgpu_ring_write(ring,
4380                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4381                                 for (i = 0; i < ext->reg_count; i++)
4382                                         amdgpu_ring_write(ring, ext->extent[i]);
4383                         }
4384                 }
4385         }
4386
4387         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4388         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4389         switch (adev->asic_type) {
4390         case CHIP_TONGA:
4391         case CHIP_POLARIS10:
4392                 amdgpu_ring_write(ring, 0x16000012);
4393                 amdgpu_ring_write(ring, 0x0000002A);
4394                 break;
4395         case CHIP_POLARIS11:
4396         case CHIP_POLARIS12:
4397                 amdgpu_ring_write(ring, 0x16000012);
4398                 amdgpu_ring_write(ring, 0x00000000);
4399                 break;
4400         case CHIP_FIJI:
4401                 amdgpu_ring_write(ring, 0x3a00161a);
4402                 amdgpu_ring_write(ring, 0x0000002e);
4403                 break;
4404         case CHIP_CARRIZO:
4405                 amdgpu_ring_write(ring, 0x00000002);
4406                 amdgpu_ring_write(ring, 0x00000000);
4407                 break;
4408         case CHIP_TOPAZ:
4409                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4410                                 0x00000000 : 0x00000002);
4411                 amdgpu_ring_write(ring, 0x00000000);
4412                 break;
4413         case CHIP_STONEY:
4414                 amdgpu_ring_write(ring, 0x00000000);
4415                 amdgpu_ring_write(ring, 0x00000000);
4416                 break;
4417         default:
4418                 BUG();
4419         }
4420
4421         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4422         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4423
4424         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4425         amdgpu_ring_write(ring, 0);
4426
4427         /* init the CE partitions */
4428         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4429         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4430         amdgpu_ring_write(ring, 0x8000);
4431         amdgpu_ring_write(ring, 0x8000);
4432
4433         amdgpu_ring_commit(ring);
4434
4435         return 0;
4436 }
4437
4438 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4439 {
4440         struct amdgpu_ring *ring;
4441         u32 tmp;
4442         u32 rb_bufsz;
4443         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4444         int r;
4445
4446         /* Set the write pointer delay */
4447         WREG32(mmCP_RB_WPTR_DELAY, 0);
4448
4449         /* set the RB to use vmid 0 */
4450         WREG32(mmCP_RB_VMID, 0);
4451
4452         /* Set ring buffer size */
4453         ring = &adev->gfx.gfx_ring[0];
4454         rb_bufsz = order_base_2(ring->ring_size / 8);
4455         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4456         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4457         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4458         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4459 #ifdef __BIG_ENDIAN
4460         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4461 #endif
4462         WREG32(mmCP_RB0_CNTL, tmp);
4463
4464         /* Initialize the ring buffer's read and write pointers */
4465         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4466         ring->wptr = 0;
4467         WREG32(mmCP_RB0_WPTR, ring->wptr);
4468
4469         /* set the wb address wether it's enabled or not */
4470         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4471         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4472         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4473
4474         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4475         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4476         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4477         mdelay(1);
4478         WREG32(mmCP_RB0_CNTL, tmp);
4479
4480         rb_addr = ring->gpu_addr >> 8;
4481         WREG32(mmCP_RB0_BASE, rb_addr);
4482         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4483
4484         /* no gfx doorbells on iceland */
4485         if (adev->asic_type != CHIP_TOPAZ) {
4486                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4487                 if (ring->use_doorbell) {
4488                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4489                                             DOORBELL_OFFSET, ring->doorbell_index);
4490                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4491                                             DOORBELL_HIT, 0);
4492                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4493                                             DOORBELL_EN, 1);
4494                 } else {
4495                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4496                                             DOORBELL_EN, 0);
4497                 }
4498                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4499
4500                 if (adev->asic_type == CHIP_TONGA) {
4501                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4502                                             DOORBELL_RANGE_LOWER,
4503                                             AMDGPU_DOORBELL_GFX_RING0);
4504                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4505
4506                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4507                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4508                 }
4509
4510         }
4511
4512         /* start the ring */
4513         gfx_v8_0_cp_gfx_start(adev);
4514         ring->ready = true;
4515         r = amdgpu_ring_test_ring(ring);
4516         if (r)
4517                 ring->ready = false;
4518
4519         return r;
4520 }
4521
4522 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4523 {
4524         int i;
4525
4526         if (enable) {
4527                 WREG32(mmCP_MEC_CNTL, 0);
4528         } else {
4529                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4530                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4531                         adev->gfx.compute_ring[i].ready = false;
4532         }
4533         udelay(50);
4534 }
4535
4536 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4537 {
4538         const struct gfx_firmware_header_v1_0 *mec_hdr;
4539         const __le32 *fw_data;
4540         unsigned i, fw_size;
4541
4542         if (!adev->gfx.mec_fw)
4543                 return -EINVAL;
4544
4545         gfx_v8_0_cp_compute_enable(adev, false);
4546
4547         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4548         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4549
4550         fw_data = (const __le32 *)
4551                 (adev->gfx.mec_fw->data +
4552                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4553         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4554
4555         /* MEC1 */
4556         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4557         for (i = 0; i < fw_size; i++)
4558                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4559         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4560
4561         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4562         if (adev->gfx.mec2_fw) {
4563                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4564
4565                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4566                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4567
4568                 fw_data = (const __le32 *)
4569                         (adev->gfx.mec2_fw->data +
4570                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4571                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4572
4573                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4574                 for (i = 0; i < fw_size; i++)
4575                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4576                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4577         }
4578
4579         return 0;
4580 }
4581
4582 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4583 {
4584         int i, r;
4585
4586         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4587                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4588
4589                 if (ring->mqd_obj) {
4590                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4591                         if (unlikely(r != 0))
4592                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4593
4594                         amdgpu_bo_unpin(ring->mqd_obj);
4595                         amdgpu_bo_unreserve(ring->mqd_obj);
4596
4597                         amdgpu_bo_unref(&ring->mqd_obj);
4598                         ring->mqd_obj = NULL;
4599                 }
4600         }
4601 }
4602
4603 /* KIQ functions */
4604 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4605 {
4606         uint32_t tmp;
4607         struct amdgpu_device *adev = ring->adev;
4608
4609         /* tell RLC which is KIQ queue */
4610         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4611         tmp &= 0xffffff00;
4612         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4613         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4614         tmp |= 0x80;
4615         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4616 }
4617
4618 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4619 {
4620         amdgpu_ring_alloc(ring, 8);
4621         /* set resources */
4622         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4623         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4624         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4625         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4626         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4627         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4628         amdgpu_ring_write(ring, 0);     /* oac mask */
4629         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4630         amdgpu_ring_commit(ring);
4631         udelay(50);
4632 }
4633
4634 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4635                                    struct amdgpu_ring *ring)
4636 {
4637         struct amdgpu_device *adev = kiq_ring->adev;
4638         uint64_t mqd_addr, wptr_addr;
4639
4640         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4641         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642         amdgpu_ring_alloc(kiq_ring, 8);
4643
4644         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4645         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4646         amdgpu_ring_write(kiq_ring, 0x21010000);
4647         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4648                         (ring->queue << 26) |
4649                         (ring->pipe << 29) |
4650                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4651         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4652         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4653         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4654         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4655         amdgpu_ring_commit(kiq_ring);
4656         udelay(50);
4657 }
4658
4659 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4660                              struct vi_mqd *mqd,
4661                              uint64_t mqd_gpu_addr,
4662                              uint64_t eop_gpu_addr,
4663                              struct amdgpu_ring *ring)
4664 {
4665         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4666         uint32_t tmp;
4667
4668         mqd->header = 0xC0310800;
4669         mqd->compute_pipelinestat_enable = 0x00000001;
4670         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4671         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4672         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4673         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4674         mqd->compute_misc_reserved = 0x00000003;
4675
4676         eop_base_addr = eop_gpu_addr >> 8;
4677         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4678         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4679
4680         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4681         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4682         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4683                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4684
4685         mqd->cp_hqd_eop_control = tmp;
4686
4687         /* enable doorbell? */
4688         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4689
4690         if (ring->use_doorbell)
4691                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4692                                          DOORBELL_EN, 1);
4693         else
4694                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4695                                          DOORBELL_EN, 0);
4696
4697         mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699         /* disable the queue if it's active */
4700         mqd->cp_hqd_dequeue_request = 0;
4701         mqd->cp_hqd_pq_rptr = 0;
4702         mqd->cp_hqd_pq_wptr = 0;
4703
4704         /* set the pointer to the MQD */
4705         mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4706         mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4707
4708         /* set MQD vmid to 0 */
4709         tmp = RREG32(mmCP_MQD_CONTROL);
4710         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4711         mqd->cp_mqd_control = tmp;
4712
4713         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4714         hqd_gpu_addr = ring->gpu_addr >> 8;
4715         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4716         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4717
4718         /* set up the HQD, this is similar to CP_RB0_CNTL */
4719         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4720         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4721                             (order_base_2(ring->ring_size / 4) - 1));
4722         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4723                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4724 #ifdef __BIG_ENDIAN
4725         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4726 #endif
4727         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4728         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4729         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4730         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4731         mqd->cp_hqd_pq_control = tmp;
4732
4733         /* set the wb address whether it's enabled or not */
4734         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4735         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4736         mqd->cp_hqd_pq_rptr_report_addr_hi =
4737                 upper_32_bits(wb_gpu_addr) & 0xffff;
4738
4739         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4740         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4741         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4742         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743
4744         tmp = 0;
4745         /* enable the doorbell if requested */
4746         if (ring->use_doorbell) {
4747                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4748                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749                                 DOORBELL_OFFSET, ring->doorbell_index);
4750
4751                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4752                                          DOORBELL_EN, 1);
4753                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4754                                          DOORBELL_SOURCE, 0);
4755                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4756                                          DOORBELL_HIT, 0);
4757         }
4758
4759         mqd->cp_hqd_pq_doorbell_control = tmp;
4760
4761         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4762         ring->wptr = 0;
4763         mqd->cp_hqd_pq_wptr = ring->wptr;
4764         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4765
4766         /* set the vmid for the queue */
4767         mqd->cp_hqd_vmid = 0;
4768
4769         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4770         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4771         mqd->cp_hqd_persistent_state = tmp;
4772
4773         /* activate the queue */
4774         mqd->cp_hqd_active = 1;
4775
4776         return 0;
4777 }
4778
4779 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4780                                       struct vi_mqd *mqd,
4781                                       struct amdgpu_ring *ring)
4782 {
4783         uint32_t tmp;
4784         int j;
4785
4786         /* disable wptr polling */
4787         tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4788         tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4789         WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4790
4791         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4792         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4793
4794         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4795         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4796
4797         /* enable doorbell? */
4798         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4799
4800         /* disable the queue if it's active */
4801         if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4802                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4803                 for (j = 0; j < adev->usec_timeout; j++) {
4804                         if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4805                                 break;
4806                         udelay(1);
4807                 }
4808                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4809                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4810                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4811         }
4812
4813         /* set the pointer to the MQD */
4814         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4815         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4816
4817         /* set MQD vmid to 0 */
4818         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4819
4820         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4821         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4822         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4823
4824         /* set up the HQD, this is similar to CP_RB0_CNTL */
4825         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4826
4827         /* set the wb address whether it's enabled or not */
4828         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4829                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4830         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4831                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4832
4833         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4834         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4835         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4836
4837         /* enable the doorbell if requested */
4838         if (ring->use_doorbell) {
4839                 if ((adev->asic_type == CHIP_CARRIZO) ||
4840                                 (adev->asic_type == CHIP_FIJI) ||
4841                                 (adev->asic_type == CHIP_STONEY)) {
4842                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4843                                                 AMDGPU_DOORBELL_KIQ << 2);
4844                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4845                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4846                 }
4847         }
4848         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4849
4850         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4851         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4852
4853         /* set the vmid for the queue */
4854         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4855
4856         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4857
4858         /* activate the queue */
4859         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4860
4861         if (ring->use_doorbell) {
4862                 tmp = RREG32(mmCP_PQ_STATUS);
4863                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4864                 WREG32(mmCP_PQ_STATUS, tmp);
4865         }
4866
4867         return 0;
4868 }
4869
4870 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4871                                    struct vi_mqd *mqd,
4872                                    u64 mqd_gpu_addr)
4873 {
4874         struct amdgpu_device *adev = ring->adev;
4875         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4876         uint64_t eop_gpu_addr;
4877         bool is_kiq = false;
4878
4879         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4880                 is_kiq = true;
4881
4882         if (is_kiq) {
4883                 eop_gpu_addr = kiq->eop_gpu_addr;
4884                 gfx_v8_0_kiq_setting(&kiq->ring);
4885         } else
4886                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4887                                         ring->queue * MEC_HPD_SIZE;
4888
4889         mutex_lock(&adev->srbm_mutex);
4890         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4891
4892         gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4893
4894         if (is_kiq)
4895                 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4896
4897         vi_srbm_select(adev, 0, 0, 0, 0);
4898         mutex_unlock(&adev->srbm_mutex);
4899
4900         if (is_kiq)
4901                 gfx_v8_0_kiq_enable(ring);
4902         else
4903                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4904
4905         return 0;
4906 }
4907
4908 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4909 {
4910         struct amdgpu_ring *ring = NULL;
4911         int i;
4912
4913         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4914                 ring = &adev->gfx.compute_ring[i];
4915                 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4916                 ring->mqd_obj = NULL;
4917         }
4918
4919         ring = &adev->gfx.kiq.ring;
4920         amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4921         ring->mqd_obj = NULL;
4922 }
4923
4924 static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4925                                     struct amdgpu_ring *ring)
4926 {
4927         struct vi_mqd *mqd;
4928         u64 mqd_gpu_addr;
4929         u32 *buf;
4930         int r = 0;
4931
4932         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4933                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4934                                     &mqd_gpu_addr, (void **)&buf);
4935         if (r) {
4936                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4937                 return r;
4938         }
4939
4940         /* init the mqd struct */
4941         memset(buf, 0, sizeof(struct vi_mqd));
4942         mqd = (struct vi_mqd *)buf;
4943
4944         r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4945         if (r)
4946                 return r;
4947
4948         amdgpu_bo_kunmap(ring->mqd_obj);
4949
4950         return 0;
4951 }
4952
4953 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4954 {
4955         struct amdgpu_ring *ring = NULL;
4956         int r, i;
4957
4958         ring = &adev->gfx.kiq.ring;
4959         r = gfx_v8_0_kiq_setup_queue(adev, ring);
4960         if (r)
4961                 return r;
4962
4963         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4964                 ring = &adev->gfx.compute_ring[i];
4965                 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4966                 if (r)
4967                         return r;
4968         }
4969
4970         gfx_v8_0_cp_compute_enable(adev, true);
4971
4972         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4973                 ring = &adev->gfx.compute_ring[i];
4974
4975                 ring->ready = true;
4976                 r = amdgpu_ring_test_ring(ring);
4977                 if (r)
4978                         ring->ready = false;
4979         }
4980
4981         ring = &adev->gfx.kiq.ring;
4982         ring->ready = true;
4983         r = amdgpu_ring_test_ring(ring);
4984         if (r)
4985                 ring->ready = false;
4986
4987         return 0;
4988 }
4989
4990 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4991 {
4992         int r, i, j;
4993         u32 tmp;
4994         bool use_doorbell = true;
4995         u64 hqd_gpu_addr;
4996         u64 mqd_gpu_addr;
4997         u64 eop_gpu_addr;
4998         u64 wb_gpu_addr;
4999         u32 *buf;
5000         struct vi_mqd *mqd;
5001
5002         /* init the queues.  */
5003         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5004                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5005
5006                 if (ring->mqd_obj == NULL) {
5007                         r = amdgpu_bo_create(adev,
5008                                              sizeof(struct vi_mqd),
5009                                              PAGE_SIZE, true,
5010                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5011                                              NULL, &ring->mqd_obj);
5012                         if (r) {
5013                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5014                                 return r;
5015                         }
5016                 }
5017
5018                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5019                 if (unlikely(r != 0)) {
5020                         gfx_v8_0_cp_compute_fini(adev);
5021                         return r;
5022                 }
5023                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5024                                   &mqd_gpu_addr);
5025                 if (r) {
5026                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5027                         gfx_v8_0_cp_compute_fini(adev);
5028                         return r;
5029                 }
5030                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5031                 if (r) {
5032                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5033                         gfx_v8_0_cp_compute_fini(adev);
5034                         return r;
5035                 }
5036
5037                 /* init the mqd struct */
5038                 memset(buf, 0, sizeof(struct vi_mqd));
5039
5040                 mqd = (struct vi_mqd *)buf;
5041                 mqd->header = 0xC0310800;
5042                 mqd->compute_pipelinestat_enable = 0x00000001;
5043                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5044                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5045                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5046                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5047                 mqd->compute_misc_reserved = 0x00000003;
5048
5049                 mutex_lock(&adev->srbm_mutex);
5050                 vi_srbm_select(adev, ring->me,
5051                                ring->pipe,
5052                                ring->queue, 0);
5053
5054                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5055                 eop_gpu_addr >>= 8;
5056
5057                 /* write the EOP addr */
5058                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5059                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5060
5061                 /* set the VMID assigned */
5062                 WREG32(mmCP_HQD_VMID, 0);
5063
5064                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5065                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5066                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5067                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5068                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5069
5070                 /* disable wptr polling */
5071                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5072                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5073                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5074
5075                 mqd->cp_hqd_eop_base_addr_lo =
5076                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5077                 mqd->cp_hqd_eop_base_addr_hi =
5078                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5079
5080                 /* enable doorbell? */
5081                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5082                 if (use_doorbell) {
5083                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5084                 } else {
5085                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5086                 }
5087                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5088                 mqd->cp_hqd_pq_doorbell_control = tmp;
5089
5090                 /* disable the queue if it's active */
5091                 mqd->cp_hqd_dequeue_request = 0;
5092                 mqd->cp_hqd_pq_rptr = 0;
5093                 mqd->cp_hqd_pq_wptr= 0;
5094                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5095                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5096                         for (j = 0; j < adev->usec_timeout; j++) {
5097                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5098                                         break;
5099                                 udelay(1);
5100                         }
5101                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5102                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5103                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5104                 }
5105
5106                 /* set the pointer to the MQD */
5107                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5108                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5109                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5110                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5111
5112                 /* set MQD vmid to 0 */
5113                 tmp = RREG32(mmCP_MQD_CONTROL);
5114                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5115                 WREG32(mmCP_MQD_CONTROL, tmp);
5116                 mqd->cp_mqd_control = tmp;
5117
5118                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5119                 hqd_gpu_addr = ring->gpu_addr >> 8;
5120                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5121                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5122                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5123                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5124
5125                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5126                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5127                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5128                                     (order_base_2(ring->ring_size / 4) - 1));
5129                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5130                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5131 #ifdef __BIG_ENDIAN
5132                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5133 #endif
5134                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5135                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5136                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5137                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5138                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5139                 mqd->cp_hqd_pq_control = tmp;
5140
5141                 /* set the wb address wether it's enabled or not */
5142                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5143                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5144                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5145                         upper_32_bits(wb_gpu_addr) & 0xffff;
5146                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5147                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5148                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5149                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5150
5151                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5152                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5153                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5154                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5155                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5156                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5157                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5158
5159                 /* enable the doorbell if requested */
5160                 if (use_doorbell) {
5161                         if ((adev->asic_type == CHIP_CARRIZO) ||
5162                             (adev->asic_type == CHIP_FIJI) ||
5163                             (adev->asic_type == CHIP_STONEY) ||
5164                             (adev->asic_type == CHIP_POLARIS11) ||
5165                             (adev->asic_type == CHIP_POLARIS10) ||
5166                             (adev->asic_type == CHIP_POLARIS12)) {
5167                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5168                                        AMDGPU_DOORBELL_KIQ << 2);
5169                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5170                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5171                         }
5172                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5173                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5174                                             DOORBELL_OFFSET, ring->doorbell_index);
5175                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5176                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5177                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5178                         mqd->cp_hqd_pq_doorbell_control = tmp;
5179
5180                 } else {
5181                         mqd->cp_hqd_pq_doorbell_control = 0;
5182                 }
5183                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5184                        mqd->cp_hqd_pq_doorbell_control);
5185
5186                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5187                 ring->wptr = 0;
5188                 mqd->cp_hqd_pq_wptr = ring->wptr;
5189                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5190                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5191
5192                 /* set the vmid for the queue */
5193                 mqd->cp_hqd_vmid = 0;
5194                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5195
5196                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5197                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5198                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5199                 mqd->cp_hqd_persistent_state = tmp;
5200                 if (adev->asic_type == CHIP_STONEY ||
5201                         adev->asic_type == CHIP_POLARIS11 ||
5202                         adev->asic_type == CHIP_POLARIS10 ||
5203                         adev->asic_type == CHIP_POLARIS12) {
5204                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5205                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5206                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5207                 }
5208
5209                 /* activate the queue */
5210                 mqd->cp_hqd_active = 1;
5211                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5212
5213                 vi_srbm_select(adev, 0, 0, 0, 0);
5214                 mutex_unlock(&adev->srbm_mutex);
5215
5216                 amdgpu_bo_kunmap(ring->mqd_obj);
5217                 amdgpu_bo_unreserve(ring->mqd_obj);
5218         }
5219
5220         if (use_doorbell) {
5221                 tmp = RREG32(mmCP_PQ_STATUS);
5222                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5223                 WREG32(mmCP_PQ_STATUS, tmp);
5224         }
5225
5226         gfx_v8_0_cp_compute_enable(adev, true);
5227
5228         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5229                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5230
5231                 ring->ready = true;
5232                 r = amdgpu_ring_test_ring(ring);
5233                 if (r)
5234                         ring->ready = false;
5235         }
5236
5237         return 0;
5238 }
5239
5240 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5241 {
5242         int r;
5243
5244         if (!(adev->flags & AMD_IS_APU))
5245                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5246
5247         if (!adev->pp_enabled) {
5248                 if (!adev->firmware.smu_load) {
5249                         /* legacy firmware loading */
5250                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5251                         if (r)
5252                                 return r;
5253
5254                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5255                         if (r)
5256                                 return r;
5257                 } else {
5258                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5259                                                         AMDGPU_UCODE_ID_CP_CE);
5260                         if (r)
5261                                 return -EINVAL;
5262
5263                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5264                                                         AMDGPU_UCODE_ID_CP_PFP);
5265                         if (r)
5266                                 return -EINVAL;
5267
5268                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5269                                                         AMDGPU_UCODE_ID_CP_ME);
5270                         if (r)
5271                                 return -EINVAL;
5272
5273                         if (adev->asic_type == CHIP_TOPAZ) {
5274                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5275                                 if (r)
5276                                         return r;
5277                         } else {
5278                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5279                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5280                                 if (r)
5281                                         return -EINVAL;
5282                         }
5283                 }
5284         }
5285
5286         r = gfx_v8_0_cp_gfx_resume(adev);
5287         if (r)
5288                 return r;
5289
5290         if (amdgpu_sriov_vf(adev))
5291                 r = gfx_v8_0_kiq_resume(adev);
5292         else
5293                 r = gfx_v8_0_cp_compute_resume(adev);
5294         if (r)
5295                 return r;
5296
5297         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5298
5299         return 0;
5300 }
5301
5302 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5303 {
5304         gfx_v8_0_cp_gfx_enable(adev, enable);
5305         gfx_v8_0_cp_compute_enable(adev, enable);
5306 }
5307
5308 static int gfx_v8_0_hw_init(void *handle)
5309 {
5310         int r;
5311         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5312
5313         gfx_v8_0_init_golden_registers(adev);
5314         gfx_v8_0_gpu_init(adev);
5315
5316         r = gfx_v8_0_rlc_resume(adev);
5317         if (r)
5318                 return r;
5319
5320         r = gfx_v8_0_cp_resume(adev);
5321
5322         return r;
5323 }
5324
5325 static int gfx_v8_0_hw_fini(void *handle)
5326 {
5327         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5328
5329         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5330         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5331         if (amdgpu_sriov_vf(adev)) {
5332                 gfx_v8_0_kiq_free_queue(adev);
5333                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5334                 return 0;
5335         }
5336         gfx_v8_0_cp_enable(adev, false);
5337         gfx_v8_0_rlc_stop(adev);
5338         gfx_v8_0_cp_compute_fini(adev);
5339
5340         amdgpu_set_powergating_state(adev,
5341                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5342
5343         return 0;
5344 }
5345
5346 static int gfx_v8_0_suspend(void *handle)
5347 {
5348         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349
5350         return gfx_v8_0_hw_fini(adev);
5351 }
5352
5353 static int gfx_v8_0_resume(void *handle)
5354 {
5355         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5356
5357         return gfx_v8_0_hw_init(adev);
5358 }
5359
5360 static bool gfx_v8_0_is_idle(void *handle)
5361 {
5362         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5363
5364         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5365                 return false;
5366         else
5367                 return true;
5368 }
5369
5370 static int gfx_v8_0_wait_for_idle(void *handle)
5371 {
5372         unsigned i;
5373         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5374
5375         for (i = 0; i < adev->usec_timeout; i++) {
5376                 if (gfx_v8_0_is_idle(handle))
5377                         return 0;
5378
5379                 udelay(1);
5380         }
5381         return -ETIMEDOUT;
5382 }
5383
5384 static bool gfx_v8_0_check_soft_reset(void *handle)
5385 {
5386         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5387         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5388         u32 tmp;
5389
5390         /* GRBM_STATUS */
5391         tmp = RREG32(mmGRBM_STATUS);
5392         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5393                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5394                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5395                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5396                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5397                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5398                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5399                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5400                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5401                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5402                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5403                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5404                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5405         }
5406
5407         /* GRBM_STATUS2 */
5408         tmp = RREG32(mmGRBM_STATUS2);
5409         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5410                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5411                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5412
5413         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5414             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5415             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5416                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5417                                                 SOFT_RESET_CPF, 1);
5418                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5419                                                 SOFT_RESET_CPC, 1);
5420                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5421                                                 SOFT_RESET_CPG, 1);
5422                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5423                                                 SOFT_RESET_GRBM, 1);
5424         }
5425
5426         /* SRBM_STATUS */
5427         tmp = RREG32(mmSRBM_STATUS);
5428         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5429                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5430                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5431         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5432                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5433                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5434
5435         if (grbm_soft_reset || srbm_soft_reset) {
5436                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5437                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5438                 return true;
5439         } else {
5440                 adev->gfx.grbm_soft_reset = 0;
5441                 adev->gfx.srbm_soft_reset = 0;
5442                 return false;
5443         }
5444 }
5445
5446 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5447                                   struct amdgpu_ring *ring)
5448 {
5449         int i;
5450
5451         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5452         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5453                 u32 tmp;
5454                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5455                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5456                                     DEQUEUE_REQ, 2);
5457                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5458                 for (i = 0; i < adev->usec_timeout; i++) {
5459                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5460                                 break;
5461                         udelay(1);
5462                 }
5463         }
5464 }
5465
5466 static int gfx_v8_0_pre_soft_reset(void *handle)
5467 {
5468         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5469         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5470
5471         if ((!adev->gfx.grbm_soft_reset) &&
5472             (!adev->gfx.srbm_soft_reset))
5473                 return 0;
5474
5475         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5476         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5477
5478         /* stop the rlc */
5479         gfx_v8_0_rlc_stop(adev);
5480
5481         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5482             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5483                 /* Disable GFX parsing/prefetching */
5484                 gfx_v8_0_cp_gfx_enable(adev, false);
5485
5486         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5487             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5488             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5489             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5490                 int i;
5491
5492                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5493                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5494
5495                         gfx_v8_0_inactive_hqd(adev, ring);
5496                 }
5497                 /* Disable MEC parsing/prefetching */
5498                 gfx_v8_0_cp_compute_enable(adev, false);
5499         }
5500
5501        return 0;
5502 }
5503
5504 static int gfx_v8_0_soft_reset(void *handle)
5505 {
5506         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5507         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5508         u32 tmp;
5509
5510         if ((!adev->gfx.grbm_soft_reset) &&
5511             (!adev->gfx.srbm_soft_reset))
5512                 return 0;
5513
5514         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5515         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5516
5517         if (grbm_soft_reset || srbm_soft_reset) {
5518                 tmp = RREG32(mmGMCON_DEBUG);
5519                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5520                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5521                 WREG32(mmGMCON_DEBUG, tmp);
5522                 udelay(50);
5523         }
5524
5525         if (grbm_soft_reset) {
5526                 tmp = RREG32(mmGRBM_SOFT_RESET);
5527                 tmp |= grbm_soft_reset;
5528                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5529                 WREG32(mmGRBM_SOFT_RESET, tmp);
5530                 tmp = RREG32(mmGRBM_SOFT_RESET);
5531
5532                 udelay(50);
5533
5534                 tmp &= ~grbm_soft_reset;
5535                 WREG32(mmGRBM_SOFT_RESET, tmp);
5536                 tmp = RREG32(mmGRBM_SOFT_RESET);
5537         }
5538
5539         if (srbm_soft_reset) {
5540                 tmp = RREG32(mmSRBM_SOFT_RESET);
5541                 tmp |= srbm_soft_reset;
5542                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5543                 WREG32(mmSRBM_SOFT_RESET, tmp);
5544                 tmp = RREG32(mmSRBM_SOFT_RESET);
5545
5546                 udelay(50);
5547
5548                 tmp &= ~srbm_soft_reset;
5549                 WREG32(mmSRBM_SOFT_RESET, tmp);
5550                 tmp = RREG32(mmSRBM_SOFT_RESET);
5551         }
5552
5553         if (grbm_soft_reset || srbm_soft_reset) {
5554                 tmp = RREG32(mmGMCON_DEBUG);
5555                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5556                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5557                 WREG32(mmGMCON_DEBUG, tmp);
5558         }
5559
5560         /* Wait a little for things to settle down */
5561         udelay(50);
5562
5563         return 0;
5564 }
5565
5566 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5567                               struct amdgpu_ring *ring)
5568 {
5569         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5570         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5571         WREG32(mmCP_HQD_PQ_RPTR, 0);
5572         WREG32(mmCP_HQD_PQ_WPTR, 0);
5573         vi_srbm_select(adev, 0, 0, 0, 0);
5574 }
5575
5576 static int gfx_v8_0_post_soft_reset(void *handle)
5577 {
5578         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5579         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5580
5581         if ((!adev->gfx.grbm_soft_reset) &&
5582             (!adev->gfx.srbm_soft_reset))
5583                 return 0;
5584
5585         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5586         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5587
5588         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5589             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5590                 gfx_v8_0_cp_gfx_resume(adev);
5591
5592         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5593             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5594             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5595             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5596                 int i;
5597
5598                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5599                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5600
5601                         gfx_v8_0_init_hqd(adev, ring);
5602                 }
5603                 gfx_v8_0_cp_compute_resume(adev);
5604         }
5605         gfx_v8_0_rlc_start(adev);
5606
5607         return 0;
5608 }
5609
5610 /**
5611  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5612  *
5613  * @adev: amdgpu_device pointer
5614  *
5615  * Fetches a GPU clock counter snapshot.
5616  * Returns the 64 bit clock counter snapshot.
5617  */
5618 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5619 {
5620         uint64_t clock;
5621
5622         mutex_lock(&adev->gfx.gpu_clock_mutex);
5623         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5624         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5625                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5626         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5627         return clock;
5628 }
5629
5630 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5631                                           uint32_t vmid,
5632                                           uint32_t gds_base, uint32_t gds_size,
5633                                           uint32_t gws_base, uint32_t gws_size,
5634                                           uint32_t oa_base, uint32_t oa_size)
5635 {
5636         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5637         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5638
5639         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5640         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5641
5642         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5643         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5644
5645         /* GDS Base */
5646         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5647         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5648                                 WRITE_DATA_DST_SEL(0)));
5649         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5650         amdgpu_ring_write(ring, 0);
5651         amdgpu_ring_write(ring, gds_base);
5652
5653         /* GDS Size */
5654         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5655         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5656                                 WRITE_DATA_DST_SEL(0)));
5657         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5658         amdgpu_ring_write(ring, 0);
5659         amdgpu_ring_write(ring, gds_size);
5660
5661         /* GWS */
5662         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5663         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5664                                 WRITE_DATA_DST_SEL(0)));
5665         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5666         amdgpu_ring_write(ring, 0);
5667         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5668
5669         /* OA */
5670         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672                                 WRITE_DATA_DST_SEL(0)));
5673         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5674         amdgpu_ring_write(ring, 0);
5675         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5676 }
5677
5678 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5679 {
5680         WREG32(mmSQ_IND_INDEX,
5681                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5682                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5683                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5684                 (SQ_IND_INDEX__FORCE_READ_MASK));
5685         return RREG32(mmSQ_IND_DATA);
5686 }
5687
5688 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5689                            uint32_t wave, uint32_t thread,
5690                            uint32_t regno, uint32_t num, uint32_t *out)
5691 {
5692         WREG32(mmSQ_IND_INDEX,
5693                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5694                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5695                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5696                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5697                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5698                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5699         while (num--)
5700                 *(out++) = RREG32(mmSQ_IND_DATA);
5701 }
5702
5703 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5704 {
5705         /* type 0 wave data */
5706         dst[(*no_fields)++] = 0;
5707         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5708         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5709         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5710         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5711         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5712         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5713         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5714         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5715         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5716         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5717         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5718         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5719         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5720         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5721         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5722         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5723         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5724         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5725 }
5726
5727 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5728                                      uint32_t wave, uint32_t start,
5729                                      uint32_t size, uint32_t *dst)
5730 {
5731         wave_read_regs(
5732                 adev, simd, wave, 0,
5733                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5734 }
5735
5736
5737 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5738         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5739         .select_se_sh = &gfx_v8_0_select_se_sh,
5740         .read_wave_data = &gfx_v8_0_read_wave_data,
5741         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5742 };
5743
5744 static int gfx_v8_0_early_init(void *handle)
5745 {
5746         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5747
5748         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5749         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5750         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5751         gfx_v8_0_set_ring_funcs(adev);
5752         gfx_v8_0_set_irq_funcs(adev);
5753         gfx_v8_0_set_gds_init(adev);
5754         gfx_v8_0_set_rlc_funcs(adev);
5755
5756         return 0;
5757 }
5758
5759 static int gfx_v8_0_late_init(void *handle)
5760 {
5761         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5762         int r;
5763
5764         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5765         if (r)
5766                 return r;
5767
5768         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5769         if (r)
5770                 return r;
5771
5772         /* requires IBs so do in late init after IB pool is initialized */
5773         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5774         if (r)
5775                 return r;
5776
5777         amdgpu_set_powergating_state(adev,
5778                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5779
5780         return 0;
5781 }
5782
5783 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5784                                                        bool enable)
5785 {
5786         if ((adev->asic_type == CHIP_POLARIS11) ||
5787             (adev->asic_type == CHIP_POLARIS12))
5788                 /* Send msg to SMU via Powerplay */
5789                 amdgpu_set_powergating_state(adev,
5790                                              AMD_IP_BLOCK_TYPE_SMC,
5791                                              enable ?
5792                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5793
5794         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5795 }
5796
5797 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5798                                                         bool enable)
5799 {
5800         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5801 }
5802
5803 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5804                 bool enable)
5805 {
5806         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5807 }
5808
5809 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5810                                           bool enable)
5811 {
5812         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5813 }
5814
5815 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5816                                                 bool enable)
5817 {
5818         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5819
5820         /* Read any GFX register to wake up GFX. */
5821         if (!enable)
5822                 RREG32(mmDB_RENDER_CONTROL);
5823 }
5824
5825 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5826                                           bool enable)
5827 {
5828         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5829                 cz_enable_gfx_cg_power_gating(adev, true);
5830                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5831                         cz_enable_gfx_pipeline_power_gating(adev, true);
5832         } else {
5833                 cz_enable_gfx_cg_power_gating(adev, false);
5834                 cz_enable_gfx_pipeline_power_gating(adev, false);
5835         }
5836 }
5837
5838 static int gfx_v8_0_set_powergating_state(void *handle,
5839                                           enum amd_powergating_state state)
5840 {
5841         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5842         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5843
5844         switch (adev->asic_type) {
5845         case CHIP_CARRIZO:
5846         case CHIP_STONEY:
5847
5848                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5849                         cz_enable_sck_slow_down_on_power_up(adev, true);
5850                         cz_enable_sck_slow_down_on_power_down(adev, true);
5851                 } else {
5852                         cz_enable_sck_slow_down_on_power_up(adev, false);
5853                         cz_enable_sck_slow_down_on_power_down(adev, false);
5854                 }
5855                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5856                         cz_enable_cp_power_gating(adev, true);
5857                 else
5858                         cz_enable_cp_power_gating(adev, false);
5859
5860                 cz_update_gfx_cg_power_gating(adev, enable);
5861
5862                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5863                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5864                 else
5865                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5866
5867                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5868                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5869                 else
5870                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5871                 break;
5872         case CHIP_POLARIS11:
5873         case CHIP_POLARIS12:
5874                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5875                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5876                 else
5877                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5878
5879                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5880                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5881                 else
5882                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5883
5884                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5885                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5886                 else
5887                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5888                 break;
5889         default:
5890                 break;
5891         }
5892
5893         return 0;
5894 }
5895
5896 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5897 {
5898         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5899         int data;
5900
5901         /* AMD_CG_SUPPORT_GFX_MGCG */
5902         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5903         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5904                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5905
5906         /* AMD_CG_SUPPORT_GFX_CGLG */
5907         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5908         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5909                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5910
5911         /* AMD_CG_SUPPORT_GFX_CGLS */
5912         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5913                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5914
5915         /* AMD_CG_SUPPORT_GFX_CGTS */
5916         data = RREG32(mmCGTS_SM_CTRL_REG);
5917         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5918                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5919
5920         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5921         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5922                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5923
5924         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5925         data = RREG32(mmRLC_MEM_SLP_CNTL);
5926         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5927                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5928
5929         /* AMD_CG_SUPPORT_GFX_CP_LS */
5930         data = RREG32(mmCP_MEM_SLP_CNTL);
5931         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5932                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5933 }
5934
5935 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5936                                      uint32_t reg_addr, uint32_t cmd)
5937 {
5938         uint32_t data;
5939
5940         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5941
5942         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5943         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5944
5945         data = RREG32(mmRLC_SERDES_WR_CTRL);
5946         if (adev->asic_type == CHIP_STONEY)
5947                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5948                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5949                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5950                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5951                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5952                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5953                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5954                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5955                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5956         else
5957                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5958                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5959                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5960                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5961                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5962                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5963                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5964                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5965                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5966                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5967                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5968         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5969                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5970                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5971                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5972
5973         WREG32(mmRLC_SERDES_WR_CTRL, data);
5974 }
5975
5976 #define MSG_ENTER_RLC_SAFE_MODE     1
5977 #define MSG_EXIT_RLC_SAFE_MODE      0
5978 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5979 #define RLC_GPR_REG2__REQ__SHIFT 0
5980 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5981 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5982
5983 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5984 {
5985         u32 data;
5986         unsigned i;
5987
5988         data = RREG32(mmRLC_CNTL);
5989         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5990                 return;
5991
5992         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5993                 data |= RLC_SAFE_MODE__CMD_MASK;
5994                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5995                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5996                 WREG32(mmRLC_SAFE_MODE, data);
5997
5998                 for (i = 0; i < adev->usec_timeout; i++) {
5999                         if ((RREG32(mmRLC_GPM_STAT) &
6000                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6001                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6002                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6003                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6004                                 break;
6005                         udelay(1);
6006                 }
6007
6008                 for (i = 0; i < adev->usec_timeout; i++) {
6009                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6010                                 break;
6011                         udelay(1);
6012                 }
6013                 adev->gfx.rlc.in_safe_mode = true;
6014         }
6015 }
6016
6017 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6018 {
6019         u32 data = 0;
6020         unsigned i;
6021
6022         data = RREG32(mmRLC_CNTL);
6023         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6024                 return;
6025
6026         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6027                 if (adev->gfx.rlc.in_safe_mode) {
6028                         data |= RLC_SAFE_MODE__CMD_MASK;
6029                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6030                         WREG32(mmRLC_SAFE_MODE, data);
6031                         adev->gfx.rlc.in_safe_mode = false;
6032                 }
6033         }
6034
6035         for (i = 0; i < adev->usec_timeout; i++) {
6036                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6037                         break;
6038                 udelay(1);
6039         }
6040 }
6041
6042 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6043         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6044         .exit_safe_mode = iceland_exit_rlc_safe_mode
6045 };
6046
6047 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6048                                                       bool enable)
6049 {
6050         uint32_t temp, data;
6051
6052         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6053
6054         /* It is disabled by HW by default */
6055         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6056                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6057                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6058                                 /* 1 - RLC memory Light sleep */
6059                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6060
6061                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6062                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6063                 }
6064
6065                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6066                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6067                 if (adev->flags & AMD_IS_APU)
6068                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6069                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6070                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6071                 else
6072                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6073                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6074                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6075                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6076
6077                 if (temp != data)
6078                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6079
6080                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6081                 gfx_v8_0_wait_for_rlc_serdes(adev);
6082
6083                 /* 5 - clear mgcg override */
6084                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6085
6086                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6087                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6088                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6089                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6090                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6091                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6092                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6093                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6094                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6095                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6096                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6097                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6098                         if (temp != data)
6099                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6100                 }
6101                 udelay(50);
6102
6103                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6104                 gfx_v8_0_wait_for_rlc_serdes(adev);
6105         } else {
6106                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6107                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6108                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6109                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6110                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6111                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6112                 if (temp != data)
6113                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6114
6115                 /* 2 - disable MGLS in RLC */
6116                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6117                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6118                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6119                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6120                 }
6121
6122                 /* 3 - disable MGLS in CP */
6123                 data = RREG32(mmCP_MEM_SLP_CNTL);
6124                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6125                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6126                         WREG32(mmCP_MEM_SLP_CNTL, data);
6127                 }
6128
6129                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6130                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6131                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6132                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6133                 if (temp != data)
6134                         WREG32(mmCGTS_SM_CTRL_REG, data);
6135
6136                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6137                 gfx_v8_0_wait_for_rlc_serdes(adev);
6138
6139                 /* 6 - set mgcg override */
6140                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6141
6142                 udelay(50);
6143
6144                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6145                 gfx_v8_0_wait_for_rlc_serdes(adev);
6146         }
6147
6148         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6149 }
6150
6151 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6152                                                       bool enable)
6153 {
6154         uint32_t temp, temp1, data, data1;
6155
6156         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6157
6158         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6159
6160         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6161                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6162                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6163                 if (temp1 != data1)
6164                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6165
6166                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6167                 gfx_v8_0_wait_for_rlc_serdes(adev);
6168
6169                 /* 2 - clear cgcg override */
6170                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6171
6172                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6173                 gfx_v8_0_wait_for_rlc_serdes(adev);
6174
6175                 /* 3 - write cmd to set CGLS */
6176                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6177
6178                 /* 4 - enable cgcg */
6179                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6180
6181                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6182                         /* enable cgls*/
6183                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6184
6185                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6186                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6187
6188                         if (temp1 != data1)
6189                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6190                 } else {
6191                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6192                 }
6193
6194                 if (temp != data)
6195                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6196
6197                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6198                  * Cmp_busy/GFX_Idle interrupts
6199                  */
6200                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6201         } else {
6202                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6203                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6204
6205                 /* TEST CGCG */
6206                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6207                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6208                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6209                 if (temp1 != data1)
6210                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6211
6212                 /* read gfx register to wake up cgcg */
6213                 RREG32(mmCB_CGTT_SCLK_CTRL);
6214                 RREG32(mmCB_CGTT_SCLK_CTRL);
6215                 RREG32(mmCB_CGTT_SCLK_CTRL);
6216                 RREG32(mmCB_CGTT_SCLK_CTRL);
6217
6218                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6219                 gfx_v8_0_wait_for_rlc_serdes(adev);
6220
6221                 /* write cmd to Set CGCG Overrride */
6222                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6223
6224                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6225                 gfx_v8_0_wait_for_rlc_serdes(adev);
6226
6227                 /* write cmd to Clear CGLS */
6228                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6229
6230                 /* disable cgcg, cgls should be disabled too. */
6231                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6232                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6233                 if (temp != data)
6234                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6235         }
6236
6237         gfx_v8_0_wait_for_rlc_serdes(adev);
6238
6239         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6240 }
6241 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6242                                             bool enable)
6243 {
6244         if (enable) {
6245                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6246                  * ===  MGCG + MGLS + TS(CG/LS) ===
6247                  */
6248                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6249                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6250         } else {
6251                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6252                  * ===  CGCG + CGLS ===
6253                  */
6254                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6255                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6256         }
6257         return 0;
6258 }
6259
6260 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6261                                           enum amd_clockgating_state state)
6262 {
6263         uint32_t msg_id, pp_state = 0;
6264         uint32_t pp_support_state = 0;
6265         void *pp_handle = adev->powerplay.pp_handle;
6266
6267         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6268                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6269                         pp_support_state = PP_STATE_SUPPORT_LS;
6270                         pp_state = PP_STATE_LS;
6271                 }
6272                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6273                         pp_support_state |= PP_STATE_SUPPORT_CG;
6274                         pp_state |= PP_STATE_CG;
6275                 }
6276                 if (state == AMD_CG_STATE_UNGATE)
6277                         pp_state = 0;
6278
6279                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6280                                 PP_BLOCK_GFX_CG,
6281                                 pp_support_state,
6282                                 pp_state);
6283                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6284         }
6285
6286         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6287                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6288                         pp_support_state = PP_STATE_SUPPORT_LS;
6289                         pp_state = PP_STATE_LS;
6290                 }
6291
6292                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6293                         pp_support_state |= PP_STATE_SUPPORT_CG;
6294                         pp_state |= PP_STATE_CG;
6295                 }
6296
6297                 if (state == AMD_CG_STATE_UNGATE)
6298                         pp_state = 0;
6299
6300                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6301                                 PP_BLOCK_GFX_MG,
6302                                 pp_support_state,
6303                                 pp_state);
6304                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6305         }
6306
6307         return 0;
6308 }
6309
6310 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6311                                           enum amd_clockgating_state state)
6312 {
6313
6314         uint32_t msg_id, pp_state = 0;
6315         uint32_t pp_support_state = 0;
6316         void *pp_handle = adev->powerplay.pp_handle;
6317
6318         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6319                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6320                         pp_support_state = PP_STATE_SUPPORT_LS;
6321                         pp_state = PP_STATE_LS;
6322                 }
6323                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6324                         pp_support_state |= PP_STATE_SUPPORT_CG;
6325                         pp_state |= PP_STATE_CG;
6326                 }
6327                 if (state == AMD_CG_STATE_UNGATE)
6328                         pp_state = 0;
6329
6330                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6331                                 PP_BLOCK_GFX_CG,
6332                                 pp_support_state,
6333                                 pp_state);
6334                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6335         }
6336
6337         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6338                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6339                         pp_support_state = PP_STATE_SUPPORT_LS;
6340                         pp_state = PP_STATE_LS;
6341                 }
6342                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6343                         pp_support_state |= PP_STATE_SUPPORT_CG;
6344                         pp_state |= PP_STATE_CG;
6345                 }
6346                 if (state == AMD_CG_STATE_UNGATE)
6347                         pp_state = 0;
6348
6349                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6350                                 PP_BLOCK_GFX_3D,
6351                                 pp_support_state,
6352                                 pp_state);
6353                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6354         }
6355
6356         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6357                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6358                         pp_support_state = PP_STATE_SUPPORT_LS;
6359                         pp_state = PP_STATE_LS;
6360                 }
6361
6362                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6363                         pp_support_state |= PP_STATE_SUPPORT_CG;
6364                         pp_state |= PP_STATE_CG;
6365                 }
6366
6367                 if (state == AMD_CG_STATE_UNGATE)
6368                         pp_state = 0;
6369
6370                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6371                                 PP_BLOCK_GFX_MG,
6372                                 pp_support_state,
6373                                 pp_state);
6374                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6375         }
6376
6377         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6378                 pp_support_state = PP_STATE_SUPPORT_LS;
6379
6380                 if (state == AMD_CG_STATE_UNGATE)
6381                         pp_state = 0;
6382                 else
6383                         pp_state = PP_STATE_LS;
6384
6385                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6386                                 PP_BLOCK_GFX_RLC,
6387                                 pp_support_state,
6388                                 pp_state);
6389                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6390         }
6391
6392         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6393                 pp_support_state = PP_STATE_SUPPORT_LS;
6394
6395                 if (state == AMD_CG_STATE_UNGATE)
6396                         pp_state = 0;
6397                 else
6398                         pp_state = PP_STATE_LS;
6399                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6400                         PP_BLOCK_GFX_CP,
6401                         pp_support_state,
6402                         pp_state);
6403                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6404         }
6405
6406         return 0;
6407 }
6408
6409 static int gfx_v8_0_set_clockgating_state(void *handle,
6410                                           enum amd_clockgating_state state)
6411 {
6412         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6413
6414         switch (adev->asic_type) {
6415         case CHIP_FIJI:
6416         case CHIP_CARRIZO:
6417         case CHIP_STONEY:
6418                 gfx_v8_0_update_gfx_clock_gating(adev,
6419                                                  state == AMD_CG_STATE_GATE ? true : false);
6420                 break;
6421         case CHIP_TONGA:
6422                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6423                 break;
6424         case CHIP_POLARIS10:
6425         case CHIP_POLARIS11:
6426                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6427                 break;
6428         default:
6429                 break;
6430         }
6431         return 0;
6432 }
6433
6434 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6435 {
6436         return ring->adev->wb.wb[ring->rptr_offs];
6437 }
6438
6439 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6440 {
6441         struct amdgpu_device *adev = ring->adev;
6442
6443         if (ring->use_doorbell)
6444                 /* XXX check if swapping is necessary on BE */
6445                 return ring->adev->wb.wb[ring->wptr_offs];
6446         else
6447                 return RREG32(mmCP_RB0_WPTR);
6448 }
6449
6450 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6451 {
6452         struct amdgpu_device *adev = ring->adev;
6453
6454         if (ring->use_doorbell) {
6455                 /* XXX check if swapping is necessary on BE */
6456                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6457                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6458         } else {
6459                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6460                 (void)RREG32(mmCP_RB0_WPTR);
6461         }
6462 }
6463
6464 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6465 {
6466         u32 ref_and_mask, reg_mem_engine;
6467
6468         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6469             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6470                 switch (ring->me) {
6471                 case 1:
6472                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6473                         break;
6474                 case 2:
6475                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6476                         break;
6477                 default:
6478                         return;
6479                 }
6480                 reg_mem_engine = 0;
6481         } else {
6482                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6483                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6484         }
6485
6486         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6487         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6488                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6489                                  reg_mem_engine));
6490         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6491         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6492         amdgpu_ring_write(ring, ref_and_mask);
6493         amdgpu_ring_write(ring, ref_and_mask);
6494         amdgpu_ring_write(ring, 0x20); /* poll interval */
6495 }
6496
6497 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6498 {
6499         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6500         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6501                 EVENT_INDEX(4));
6502
6503         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6504         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6505                 EVENT_INDEX(0));
6506 }
6507
6508
6509 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6510 {
6511         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6512         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6513                                  WRITE_DATA_DST_SEL(0) |
6514                                  WR_CONFIRM));
6515         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6516         amdgpu_ring_write(ring, 0);
6517         amdgpu_ring_write(ring, 1);
6518
6519 }
6520
6521 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6522                                       struct amdgpu_ib *ib,
6523                                       unsigned vm_id, bool ctx_switch)
6524 {
6525         u32 header, control = 0;
6526
6527         if (ib->flags & AMDGPU_IB_FLAG_CE)
6528                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6529         else
6530                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6531
6532         control |= ib->length_dw | (vm_id << 24);
6533
6534         amdgpu_ring_write(ring, header);
6535         amdgpu_ring_write(ring,
6536 #ifdef __BIG_ENDIAN
6537                           (2 << 0) |
6538 #endif
6539                           (ib->gpu_addr & 0xFFFFFFFC));
6540         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6541         amdgpu_ring_write(ring, control);
6542 }
6543
6544 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6545                                           struct amdgpu_ib *ib,
6546                                           unsigned vm_id, bool ctx_switch)
6547 {
6548         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6549
6550         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6551         amdgpu_ring_write(ring,
6552 #ifdef __BIG_ENDIAN
6553                                 (2 << 0) |
6554 #endif
6555                                 (ib->gpu_addr & 0xFFFFFFFC));
6556         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6557         amdgpu_ring_write(ring, control);
6558 }
6559
6560 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6561                                          u64 seq, unsigned flags)
6562 {
6563         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6564         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6565
6566         /* EVENT_WRITE_EOP - flush caches, send int */
6567         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6568         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6569                                  EOP_TC_ACTION_EN |
6570                                  EOP_TC_WB_ACTION_EN |
6571                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6572                                  EVENT_INDEX(5)));
6573         amdgpu_ring_write(ring, addr & 0xfffffffc);
6574         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6575                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6576         amdgpu_ring_write(ring, lower_32_bits(seq));
6577         amdgpu_ring_write(ring, upper_32_bits(seq));
6578
6579 }
6580
6581 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6582 {
6583         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6584         uint32_t seq = ring->fence_drv.sync_seq;
6585         uint64_t addr = ring->fence_drv.gpu_addr;
6586
6587         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6588         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6589                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6590                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6591         amdgpu_ring_write(ring, addr & 0xfffffffc);
6592         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6593         amdgpu_ring_write(ring, seq);
6594         amdgpu_ring_write(ring, 0xffffffff);
6595         amdgpu_ring_write(ring, 4); /* poll interval */
6596 }
6597
6598 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6599                                         unsigned vm_id, uint64_t pd_addr)
6600 {
6601         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6602
6603         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6604         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6605                                  WRITE_DATA_DST_SEL(0)) |
6606                                  WR_CONFIRM);
6607         if (vm_id < 8) {
6608                 amdgpu_ring_write(ring,
6609                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6610         } else {
6611                 amdgpu_ring_write(ring,
6612                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6613         }
6614         amdgpu_ring_write(ring, 0);
6615         amdgpu_ring_write(ring, pd_addr >> 12);
6616
6617         /* bits 0-15 are the VM contexts0-15 */
6618         /* invalidate the cache */
6619         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6620         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6621                                  WRITE_DATA_DST_SEL(0)));
6622         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6623         amdgpu_ring_write(ring, 0);
6624         amdgpu_ring_write(ring, 1 << vm_id);
6625
6626         /* wait for the invalidate to complete */
6627         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6628         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6629                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6630                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6631         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6632         amdgpu_ring_write(ring, 0);
6633         amdgpu_ring_write(ring, 0); /* ref */
6634         amdgpu_ring_write(ring, 0); /* mask */
6635         amdgpu_ring_write(ring, 0x20); /* poll interval */
6636
6637         /* compute doesn't have PFP */
6638         if (usepfp) {
6639                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6640                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6641                 amdgpu_ring_write(ring, 0x0);
6642                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6643                 amdgpu_ring_insert_nop(ring, 128);
6644         }
6645 }
6646
6647 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6648 {
6649         return ring->adev->wb.wb[ring->wptr_offs];
6650 }
6651
6652 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6653 {
6654         struct amdgpu_device *adev = ring->adev;
6655
6656         /* XXX check if swapping is necessary on BE */
6657         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6658         WDOORBELL32(ring->doorbell_index, ring->wptr);
6659 }
6660
6661 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6662                                              u64 addr, u64 seq,
6663                                              unsigned flags)
6664 {
6665         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6666         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6667
6668         /* RELEASE_MEM - flush caches, send int */
6669         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6670         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6671                                  EOP_TC_ACTION_EN |
6672                                  EOP_TC_WB_ACTION_EN |
6673                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6674                                  EVENT_INDEX(5)));
6675         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6676         amdgpu_ring_write(ring, addr & 0xfffffffc);
6677         amdgpu_ring_write(ring, upper_32_bits(addr));
6678         amdgpu_ring_write(ring, lower_32_bits(seq));
6679         amdgpu_ring_write(ring, upper_32_bits(seq));
6680 }
6681
6682 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6683                                          u64 seq, unsigned int flags)
6684 {
6685         /* we only allocate 32bit for each seq wb address */
6686         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6687
6688         /* write fence seq to the "addr" */
6689         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6690         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6691                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6692         amdgpu_ring_write(ring, lower_32_bits(addr));
6693         amdgpu_ring_write(ring, upper_32_bits(addr));
6694         amdgpu_ring_write(ring, lower_32_bits(seq));
6695
6696         if (flags & AMDGPU_FENCE_FLAG_INT) {
6697                 /* set register to trigger INT */
6698                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6699                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6700                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6701                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6702                 amdgpu_ring_write(ring, 0);
6703                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6704         }
6705 }
6706
6707 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6708 {
6709         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6710         amdgpu_ring_write(ring, 0);
6711 }
6712
6713 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6714 {
6715         uint32_t dw2 = 0;
6716
6717         if (amdgpu_sriov_vf(ring->adev))
6718                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6719                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6720
6721         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6722         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6723                 gfx_v8_0_ring_emit_vgt_flush(ring);
6724                 /* set load_global_config & load_global_uconfig */
6725                 dw2 |= 0x8001;
6726                 /* set load_cs_sh_regs */
6727                 dw2 |= 0x01000000;
6728                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6729                 dw2 |= 0x10002;
6730
6731                 /* set load_ce_ram if preamble presented */
6732                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6733                         dw2 |= 0x10000000;
6734         } else {
6735                 /* still load_ce_ram if this is the first time preamble presented
6736                  * although there is no context switch happens.
6737                  */
6738                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6739                         dw2 |= 0x10000000;
6740         }
6741
6742         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6743         amdgpu_ring_write(ring, dw2);
6744         amdgpu_ring_write(ring, 0);
6745
6746         if (amdgpu_sriov_vf(ring->adev))
6747                 gfx_v8_0_ring_emit_de_meta_init(ring,
6748                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6749 }
6750
6751 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6752 {
6753         struct amdgpu_device *adev = ring->adev;
6754
6755         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6756         amdgpu_ring_write(ring, 0 |     /* src: register*/
6757                                 (5 << 8) |      /* dst: memory */
6758                                 (1 << 20));     /* write confirm */
6759         amdgpu_ring_write(ring, reg);
6760         amdgpu_ring_write(ring, 0);
6761         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6762                                 adev->virt.reg_val_offs * 4));
6763         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6764                                 adev->virt.reg_val_offs * 4));
6765 }
6766
6767 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6768                                   uint32_t val)
6769 {
6770         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6771         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6772         amdgpu_ring_write(ring, reg);
6773         amdgpu_ring_write(ring, 0);
6774         amdgpu_ring_write(ring, val);
6775 }
6776
6777 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6778                                                  enum amdgpu_interrupt_state state)
6779 {
6780         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6781                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6782 }
6783
6784 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6785                                                      int me, int pipe,
6786                                                      enum amdgpu_interrupt_state state)
6787 {
6788         /*
6789          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6790          * handles the setting of interrupts for this specific pipe. All other
6791          * pipes' interrupts are set by amdkfd.
6792          */
6793
6794         if (me == 1) {
6795                 switch (pipe) {
6796                 case 0:
6797                         break;
6798                 default:
6799                         DRM_DEBUG("invalid pipe %d\n", pipe);
6800                         return;
6801                 }
6802         } else {
6803                 DRM_DEBUG("invalid me %d\n", me);
6804                 return;
6805         }
6806
6807         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6808                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6809 }
6810
6811 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6812                                              struct amdgpu_irq_src *source,
6813                                              unsigned type,
6814                                              enum amdgpu_interrupt_state state)
6815 {
6816         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6817                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6818
6819         return 0;
6820 }
6821
6822 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6823                                               struct amdgpu_irq_src *source,
6824                                               unsigned type,
6825                                               enum amdgpu_interrupt_state state)
6826 {
6827         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6828                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6829
6830         return 0;
6831 }
6832
6833 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6834                                             struct amdgpu_irq_src *src,
6835                                             unsigned type,
6836                                             enum amdgpu_interrupt_state state)
6837 {
6838         switch (type) {
6839         case AMDGPU_CP_IRQ_GFX_EOP:
6840                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6841                 break;
6842         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6843                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6844                 break;
6845         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6846                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6847                 break;
6848         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6849                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6850                 break;
6851         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6852                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6853                 break;
6854         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6855                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6856                 break;
6857         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6858                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6859                 break;
6860         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6861                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6862                 break;
6863         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6864                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6865                 break;
6866         default:
6867                 break;
6868         }
6869         return 0;
6870 }
6871
6872 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6873                             struct amdgpu_irq_src *source,
6874                             struct amdgpu_iv_entry *entry)
6875 {
6876         int i;
6877         u8 me_id, pipe_id, queue_id;
6878         struct amdgpu_ring *ring;
6879
6880         DRM_DEBUG("IH: CP EOP\n");
6881         me_id = (entry->ring_id & 0x0c) >> 2;
6882         pipe_id = (entry->ring_id & 0x03) >> 0;
6883         queue_id = (entry->ring_id & 0x70) >> 4;
6884
6885         switch (me_id) {
6886         case 0:
6887                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6888                 break;
6889         case 1:
6890         case 2:
6891                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6892                         ring = &adev->gfx.compute_ring[i];
6893                         /* Per-queue interrupt is supported for MEC starting from VI.
6894                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6895                           */
6896                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6897                                 amdgpu_fence_process(ring);
6898                 }
6899                 break;
6900         }
6901         return 0;
6902 }
6903
6904 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6905                                  struct amdgpu_irq_src *source,
6906                                  struct amdgpu_iv_entry *entry)
6907 {
6908         DRM_ERROR("Illegal register access in command stream\n");
6909         schedule_work(&adev->reset_work);
6910         return 0;
6911 }
6912
6913 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6914                                   struct amdgpu_irq_src *source,
6915                                   struct amdgpu_iv_entry *entry)
6916 {
6917         DRM_ERROR("Illegal instruction in command stream\n");
6918         schedule_work(&adev->reset_work);
6919         return 0;
6920 }
6921
6922 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6923                                             struct amdgpu_irq_src *src,
6924                                             unsigned int type,
6925                                             enum amdgpu_interrupt_state state)
6926 {
6927         uint32_t tmp, target;
6928         struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6929
6930         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6931
6932         if (ring->me == 1)
6933                 target = mmCP_ME1_PIPE0_INT_CNTL;
6934         else
6935                 target = mmCP_ME2_PIPE0_INT_CNTL;
6936         target += ring->pipe;
6937
6938         switch (type) {
6939         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6940                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6941                         tmp = RREG32(mmCPC_INT_CNTL);
6942                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6943                                                  GENERIC2_INT_ENABLE, 0);
6944                         WREG32(mmCPC_INT_CNTL, tmp);
6945
6946                         tmp = RREG32(target);
6947                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6948                                                  GENERIC2_INT_ENABLE, 0);
6949                         WREG32(target, tmp);
6950                 } else {
6951                         tmp = RREG32(mmCPC_INT_CNTL);
6952                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6953                                                  GENERIC2_INT_ENABLE, 1);
6954                         WREG32(mmCPC_INT_CNTL, tmp);
6955
6956                         tmp = RREG32(target);
6957                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6958                                                  GENERIC2_INT_ENABLE, 1);
6959                         WREG32(target, tmp);
6960                 }
6961                 break;
6962         default:
6963                 BUG(); /* kiq only support GENERIC2_INT now */
6964                 break;
6965         }
6966         return 0;
6967 }
6968
6969 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6970                             struct amdgpu_irq_src *source,
6971                             struct amdgpu_iv_entry *entry)
6972 {
6973         u8 me_id, pipe_id, queue_id;
6974         struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6975
6976         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6977
6978         me_id = (entry->ring_id & 0x0c) >> 2;
6979         pipe_id = (entry->ring_id & 0x03) >> 0;
6980         queue_id = (entry->ring_id & 0x70) >> 4;
6981         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6982                    me_id, pipe_id, queue_id);
6983
6984         amdgpu_fence_process(ring);
6985         return 0;
6986 }
6987
6988 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6989         .name = "gfx_v8_0",
6990         .early_init = gfx_v8_0_early_init,
6991         .late_init = gfx_v8_0_late_init,
6992         .sw_init = gfx_v8_0_sw_init,
6993         .sw_fini = gfx_v8_0_sw_fini,
6994         .hw_init = gfx_v8_0_hw_init,
6995         .hw_fini = gfx_v8_0_hw_fini,
6996         .suspend = gfx_v8_0_suspend,
6997         .resume = gfx_v8_0_resume,
6998         .is_idle = gfx_v8_0_is_idle,
6999         .wait_for_idle = gfx_v8_0_wait_for_idle,
7000         .check_soft_reset = gfx_v8_0_check_soft_reset,
7001         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7002         .soft_reset = gfx_v8_0_soft_reset,
7003         .post_soft_reset = gfx_v8_0_post_soft_reset,
7004         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7005         .set_powergating_state = gfx_v8_0_set_powergating_state,
7006         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7007 };
7008
7009 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7010         .type = AMDGPU_RING_TYPE_GFX,
7011         .align_mask = 0xff,
7012         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7013         .get_rptr = gfx_v8_0_ring_get_rptr,
7014         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7015         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7016         .emit_frame_size =
7017                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7018                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7019                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7020                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7021                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7022                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7023                 2 + /* gfx_v8_ring_emit_sb */
7024                 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
7025         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7026         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7027         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7028         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7029         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7030         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7031         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7032         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7033         .test_ring = gfx_v8_0_ring_test_ring,
7034         .test_ib = gfx_v8_0_ring_test_ib,
7035         .insert_nop = amdgpu_ring_insert_nop,
7036         .pad_ib = amdgpu_ring_generic_pad_ib,
7037         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7038         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7039 };
7040
7041 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7042         .type = AMDGPU_RING_TYPE_COMPUTE,
7043         .align_mask = 0xff,
7044         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7045         .get_rptr = gfx_v8_0_ring_get_rptr,
7046         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7047         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7048         .emit_frame_size =
7049                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7050                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7051                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7052                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7053                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7054                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7055         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7056         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7057         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7058         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7059         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7060         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7061         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7062         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7063         .test_ring = gfx_v8_0_ring_test_ring,
7064         .test_ib = gfx_v8_0_ring_test_ib,
7065         .insert_nop = amdgpu_ring_insert_nop,
7066         .pad_ib = amdgpu_ring_generic_pad_ib,
7067 };
7068
7069 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7070         .type = AMDGPU_RING_TYPE_KIQ,
7071         .align_mask = 0xff,
7072         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7073         .get_rptr = gfx_v8_0_ring_get_rptr,
7074         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7075         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7076         .emit_frame_size =
7077                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7078                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7079                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7080                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7081                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7082                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7083         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7084         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7085         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7086         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7087         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7088         .test_ring = gfx_v8_0_ring_test_ring,
7089         .test_ib = gfx_v8_0_ring_test_ib,
7090         .insert_nop = amdgpu_ring_insert_nop,
7091         .pad_ib = amdgpu_ring_generic_pad_ib,
7092         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7093         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7094 };
7095
7096 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7097 {
7098         int i;
7099
7100         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7101
7102         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7103                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7104
7105         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7106                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7107 }
7108
7109 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7110         .set = gfx_v8_0_set_eop_interrupt_state,
7111         .process = gfx_v8_0_eop_irq,
7112 };
7113
7114 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7115         .set = gfx_v8_0_set_priv_reg_fault_state,
7116         .process = gfx_v8_0_priv_reg_irq,
7117 };
7118
7119 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7120         .set = gfx_v8_0_set_priv_inst_fault_state,
7121         .process = gfx_v8_0_priv_inst_irq,
7122 };
7123
7124 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7125         .set = gfx_v8_0_kiq_set_interrupt_state,
7126         .process = gfx_v8_0_kiq_irq,
7127 };
7128
7129 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7130 {
7131         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7132         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7133
7134         adev->gfx.priv_reg_irq.num_types = 1;
7135         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7136
7137         adev->gfx.priv_inst_irq.num_types = 1;
7138         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7139
7140         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7141         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7142 }
7143
7144 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7145 {
7146         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7147 }
7148
7149 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7150 {
7151         /* init asci gds info */
7152         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7153         adev->gds.gws.total_size = 64;
7154         adev->gds.oa.total_size = 16;
7155
7156         if (adev->gds.mem.total_size == 64 * 1024) {
7157                 adev->gds.mem.gfx_partition_size = 4096;
7158                 adev->gds.mem.cs_partition_size = 4096;
7159
7160                 adev->gds.gws.gfx_partition_size = 4;
7161                 adev->gds.gws.cs_partition_size = 4;
7162
7163                 adev->gds.oa.gfx_partition_size = 4;
7164                 adev->gds.oa.cs_partition_size = 1;
7165         } else {
7166                 adev->gds.mem.gfx_partition_size = 1024;
7167                 adev->gds.mem.cs_partition_size = 1024;
7168
7169                 adev->gds.gws.gfx_partition_size = 16;
7170                 adev->gds.gws.cs_partition_size = 16;
7171
7172                 adev->gds.oa.gfx_partition_size = 4;
7173                 adev->gds.oa.cs_partition_size = 4;
7174         }
7175 }
7176
7177 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7178                                                  u32 bitmap)
7179 {
7180         u32 data;
7181
7182         if (!bitmap)
7183                 return;
7184
7185         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7186         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7187
7188         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7189 }
7190
7191 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7192 {
7193         u32 data, mask;
7194
7195         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7196                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7197
7198         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7199
7200         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7201 }
7202
7203 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7204 {
7205         int i, j, k, counter, active_cu_number = 0;
7206         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7207         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7208         unsigned disable_masks[4 * 2];
7209
7210         memset(cu_info, 0, sizeof(*cu_info));
7211
7212         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7213
7214         mutex_lock(&adev->grbm_idx_mutex);
7215         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7216                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7217                         mask = 1;
7218                         ao_bitmap = 0;
7219                         counter = 0;
7220                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7221                         if (i < 4 && j < 2)
7222                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7223                                         adev, disable_masks[i * 2 + j]);
7224                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7225                         cu_info->bitmap[i][j] = bitmap;
7226
7227                         for (k = 0; k < 16; k ++) {
7228                                 if (bitmap & mask) {
7229                                         if (counter < 2)
7230                                                 ao_bitmap |= mask;
7231                                         counter ++;
7232                                 }
7233                                 mask <<= 1;
7234                         }
7235                         active_cu_number += counter;
7236                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7237                 }
7238         }
7239         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7240         mutex_unlock(&adev->grbm_idx_mutex);
7241
7242         cu_info->number = active_cu_number;
7243         cu_info->ao_cu_mask = ao_cu_mask;
7244 }
7245
7246 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7247 {
7248         .type = AMD_IP_BLOCK_TYPE_GFX,
7249         .major = 8,
7250         .minor = 0,
7251         .rev = 0,
7252         .funcs = &gfx_v8_0_ip_funcs,
7253 };
7254
7255 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7256 {
7257         .type = AMD_IP_BLOCK_TYPE_GFX,
7258         .major = 8,
7259         .minor = 1,
7260         .rev = 0,
7261         .funcs = &gfx_v8_0_ip_funcs,
7262 };
7263
7264 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7265 {
7266         uint64_t ce_payload_addr;
7267         int cnt_ce;
7268         static union {
7269                 struct amdgpu_ce_ib_state regular;
7270                 struct amdgpu_ce_ib_state_chained_ib chained;
7271         } ce_payload = {0};
7272
7273         if (ring->adev->virt.chained_ib_support) {
7274                 ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload);
7275                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7276         } else {
7277                 ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload);
7278                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7279         }
7280
7281         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7282         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7283                                 WRITE_DATA_DST_SEL(8) |
7284                                 WR_CONFIRM) |
7285                                 WRITE_DATA_CACHE_POLICY(0));
7286         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7287         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7288         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7289 }
7290
7291 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7292 {
7293         uint64_t de_payload_addr, gds_addr;
7294         int cnt_de;
7295         static union {
7296                 struct amdgpu_de_ib_state regular;
7297                 struct amdgpu_de_ib_state_chained_ib chained;
7298         } de_payload = {0};
7299
7300         gds_addr = csa_addr + 4096;
7301         if (ring->adev->virt.chained_ib_support) {
7302                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7303                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7304                 de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload);
7305                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7306         } else {
7307                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7308                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7309                 de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload);
7310                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7311         }
7312
7313         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7314         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7315                                 WRITE_DATA_DST_SEL(8) |
7316                                 WR_CONFIRM) |
7317                                 WRITE_DATA_CACHE_POLICY(0));
7318         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7319         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7320         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7321 }