]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
43
44 #define VCE_V4_0_FW_SIZE        (384 * 1024)
45 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
46 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61         struct amdgpu_device *adev = ring->adev;
62
63         if (ring == &adev->vce.ring[0])
64                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65         else if (ring == &adev->vce.ring[1])
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67         else
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80         struct amdgpu_device *adev = ring->adev;
81
82         if (ring->use_doorbell)
83                 return adev->wb.wb[ring->wptr_offs];
84
85         if (ring == &adev->vce.ring[0])
86                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87         else if (ring == &adev->vce.ring[1])
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89         else
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102         struct amdgpu_device *adev = ring->adev;
103
104         if (ring->use_doorbell) {
105                 /* XXX check if swapping is necessary on BE */
106                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108                 return;
109         }
110
111         if (ring == &adev->vce.ring[0])
112                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113                         lower_32_bits(ring->wptr));
114         else if (ring == &adev->vce.ring[1])
115                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116                         lower_32_bits(ring->wptr));
117         else
118                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119                         lower_32_bits(ring->wptr));
120 }
121
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124         int i, j;
125
126         for (i = 0; i < 10; ++i) {
127                 for (j = 0; j < 100; ++j) {
128                         uint32_t status =
129                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132                                 return 0;
133                         mdelay(10);
134                 }
135
136                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140                 mdelay(10);
141                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143                 mdelay(10);
144
145         }
146
147         return -ETIMEDOUT;
148 }
149
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151                                 struct amdgpu_mm_table *table)
152 {
153         uint32_t data = 0, loop;
154         uint64_t addr = table->gpu_addr;
155         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156         uint32_t size;
157
158         size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164         /* 2, update vmid of descriptor */
165         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170         /* 3, notify mmsch about the size of this descriptor */
171         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173         /* 4, set resp to zero */
174         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178
179         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180         loop = 1000;
181         while ((data & 0x10000002) != 0x10000002) {
182                 udelay(10);
183                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184                 loop--;
185                 if (!loop)
186                         break;
187         }
188
189         if (!loop) {
190                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191                 return -EBUSY;
192         }
193         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
194
195         return 0;
196 }
197
198 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
199 {
200         struct amdgpu_ring *ring;
201         uint32_t offset, size;
202         uint32_t table_size = 0;
203         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
204         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
205         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
206         struct mmsch_v1_0_cmd_end end = { { 0 } };
207         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
208         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
209
210         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
211         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
212         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
213         end.cmd_header.command_type = MMSCH_COMMAND__END;
214
215         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
216                 header->version = MMSCH_VERSION;
217                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
218
219                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
220                         header->vce_table_offset = header->header_size;
221                 else
222                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
223
224                 init_table += header->vce_table_offset;
225
226                 ring = &adev->vce.ring[0];
227                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
228                                             lower_32_bits(ring->gpu_addr));
229                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
230                                             upper_32_bits(ring->gpu_addr));
231                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
232                                             ring->ring_size / 4);
233
234                 /* BEGING OF MC_RESUME */
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
236                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
238                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
239                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
240
241                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
242                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
243                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
244                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
245                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
246                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
247                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248                 } else {
249                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250                                                 adev->vce.gpu_addr >> 8);
251                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
252                                                 adev->vce.gpu_addr >> 8);
253                     MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
254                                                 adev->vce.gpu_addr >> 8);
255                 }
256
257                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
258                 size = VCE_V4_0_FW_SIZE;
259                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
260                                             offset & 0x7FFFFFFF);
261                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
262
263                 offset += size;
264                 size = VCE_V4_0_STACK_SIZE;
265                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
266                                             offset & 0x7FFFFFFF);
267                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
268
269                 offset += size;
270                 size = VCE_V4_0_DATA_SIZE;
271                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
272                                             offset & 0x7FFFFFFF);
273                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
274
275                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
276                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
277                                                    0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
278
279                 /* end of MC_RESUME */
280                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
281                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
282                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
283                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
284                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
285                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
286
287                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
288                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
289                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
290
291                 /* clear BUSY flag */
292                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
293                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
294
295                 /* add end packet */
296                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
297                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
298                 header->vce_table_size = table_size;
299
300                 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
301         }
302
303         return -EINVAL; /* already initializaed ? */
304 }
305
306 /**
307  * vce_v4_0_start - start VCE block
308  *
309  * @adev: amdgpu_device pointer
310  *
311  * Setup and start the VCE block
312  */
313 static int vce_v4_0_start(struct amdgpu_device *adev)
314 {
315         struct amdgpu_ring *ring;
316         int r;
317
318         ring = &adev->vce.ring[0];
319
320         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
321         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
322         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
323         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
324         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
325
326         ring = &adev->vce.ring[1];
327
328         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
329         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
330         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
331         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
332         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
333
334         ring = &adev->vce.ring[2];
335
336         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
337         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
338         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
339         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
340         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
341
342         vce_v4_0_mc_resume(adev);
343         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
344                         ~VCE_STATUS__JOB_BUSY_MASK);
345
346         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
347
348         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
349                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
350         mdelay(100);
351
352         r = vce_v4_0_firmware_loaded(adev);
353
354         /* clear BUSY flag */
355         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
356
357         if (r) {
358                 DRM_ERROR("VCE not responding, giving up!!!\n");
359                 return r;
360         }
361
362         return 0;
363 }
364
365 static int vce_v4_0_stop(struct amdgpu_device *adev)
366 {
367
368         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
369
370         /* hold on ECPU */
371         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
372                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
373                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
374
375         /* clear BUSY flag */
376         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
377
378         /* Set Clock-Gating off */
379         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
380                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
381         */
382
383         return 0;
384 }
385
386 static int vce_v4_0_early_init(void *handle)
387 {
388         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
389
390         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
391                 adev->vce.num_rings = 1;
392         else
393                 adev->vce.num_rings = 3;
394
395         vce_v4_0_set_ring_funcs(adev);
396         vce_v4_0_set_irq_funcs(adev);
397
398         return 0;
399 }
400
401 static int vce_v4_0_sw_init(void *handle)
402 {
403         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404         struct amdgpu_ring *ring;
405         unsigned size;
406         int r, i;
407
408         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
409         if (r)
410                 return r;
411
412         size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
413         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
414                 size += VCE_V4_0_FW_SIZE;
415
416         r = amdgpu_vce_sw_init(adev, size);
417         if (r)
418                 return r;
419
420         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
421                 const struct common_firmware_header *hdr;
422                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
423
424                 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
425                 if (!adev->vce.saved_bo)
426                         return -ENOMEM;
427
428                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
429                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
430                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
431                 adev->firmware.fw_size +=
432                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
433                 DRM_INFO("PSP loading VCE firmware\n");
434         } else {
435                 r = amdgpu_vce_resume(adev);
436                 if (r)
437                         return r;
438         }
439
440         for (i = 0; i < adev->vce.num_rings; i++) {
441                 ring = &adev->vce.ring[i];
442                 sprintf(ring->name, "vce%d", i);
443                 if (amdgpu_sriov_vf(adev)) {
444                         /* DOORBELL only works under SRIOV */
445                         ring->use_doorbell = true;
446                         if (i == 0)
447                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
448                         else if (i == 1)
449                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
450                         else
451                                 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
452                 }
453                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
454                 if (r)
455                         return r;
456         }
457
458         r = amdgpu_virt_alloc_mm_table(adev);
459         if (r)
460                 return r;
461
462         return r;
463 }
464
465 static int vce_v4_0_sw_fini(void *handle)
466 {
467         int r;
468         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
469
470         /* free MM table */
471         amdgpu_virt_free_mm_table(adev);
472
473         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
474                 kfree(adev->vce.saved_bo);
475                 adev->vce.saved_bo = NULL;
476         }
477
478         r = amdgpu_vce_suspend(adev);
479         if (r)
480                 return r;
481
482         return amdgpu_vce_sw_fini(adev);
483 }
484
485 static int vce_v4_0_hw_init(void *handle)
486 {
487         int r, i;
488         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
489
490         if (amdgpu_sriov_vf(adev))
491                 r = vce_v4_0_sriov_start(adev);
492         else
493                 r = vce_v4_0_start(adev);
494         if (r)
495                 return r;
496
497         for (i = 0; i < adev->vce.num_rings; i++)
498                 adev->vce.ring[i].ready = false;
499
500         for (i = 0; i < adev->vce.num_rings; i++) {
501                 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
502                 if (r)
503                         return r;
504                 else
505                         adev->vce.ring[i].ready = true;
506         }
507
508         DRM_INFO("VCE initialized successfully.\n");
509
510         return 0;
511 }
512
513 static int vce_v4_0_hw_fini(void *handle)
514 {
515         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
516         int i;
517
518         if (!amdgpu_sriov_vf(adev)) {
519                 /* vce_v4_0_wait_for_idle(handle); */
520                 vce_v4_0_stop(adev);
521         } else {
522                 /* full access mode, so don't touch any VCE register */
523                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
524         }
525
526         for (i = 0; i < adev->vce.num_rings; i++)
527                 adev->vce.ring[i].ready = false;
528
529         return 0;
530 }
531
532 static int vce_v4_0_suspend(void *handle)
533 {
534         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
535         int r;
536
537         if (adev->vce.vcpu_bo == NULL)
538                 return 0;
539
540         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
541                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
542                 void *ptr = adev->vce.cpu_addr;
543
544                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
545         }
546
547         r = vce_v4_0_hw_fini(adev);
548         if (r)
549                 return r;
550
551         return amdgpu_vce_suspend(adev);
552 }
553
554 static int vce_v4_0_resume(void *handle)
555 {
556         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
557         int r;
558
559         if (adev->vce.vcpu_bo == NULL)
560                 return -EINVAL;
561
562         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
564                 void *ptr = adev->vce.cpu_addr;
565
566                 memcpy_toio(ptr, adev->vce.saved_bo, size);
567         } else {
568                 r = amdgpu_vce_resume(adev);
569                 if (r)
570                         return r;
571         }
572
573         return vce_v4_0_hw_init(adev);
574 }
575
576 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
577 {
578         uint32_t offset, size;
579
580         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
581         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
582         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
583         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
584
585         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
586         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
587         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
588         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
589         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
590
591         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
592                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
593                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
594                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
595                         (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
596         } else {
597                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
598                         (adev->vce.gpu_addr >> 8));
599                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
600                         (adev->vce.gpu_addr >> 40) & 0xff);
601         }
602
603         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
604         size = VCE_V4_0_FW_SIZE;
605         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
606         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
607
608         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
609         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
610         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
611         size = VCE_V4_0_STACK_SIZE;
612         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
613         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
614
615         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
616         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
617         offset += size;
618         size = VCE_V4_0_DATA_SIZE;
619         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
620         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
621
622         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
623         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
624                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
625                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
626 }
627
628 static int vce_v4_0_set_clockgating_state(void *handle,
629                                           enum amd_clockgating_state state)
630 {
631         /* needed for driver unload*/
632         return 0;
633 }
634
635 #if 0
636 static bool vce_v4_0_is_idle(void *handle)
637 {
638         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
639         u32 mask = 0;
640
641         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
642         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
643
644         return !(RREG32(mmSRBM_STATUS2) & mask);
645 }
646
647 static int vce_v4_0_wait_for_idle(void *handle)
648 {
649         unsigned i;
650         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
651
652         for (i = 0; i < adev->usec_timeout; i++)
653                 if (vce_v4_0_is_idle(handle))
654                         return 0;
655
656         return -ETIMEDOUT;
657 }
658
659 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
660 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
661 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
662 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
663                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
664
665 static bool vce_v4_0_check_soft_reset(void *handle)
666 {
667         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
668         u32 srbm_soft_reset = 0;
669
670         /* According to VCE team , we should use VCE_STATUS instead
671          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
672          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
673          * instance's registers are accessed
674          * (0 for 1st instance, 10 for 2nd instance).
675          *
676          *VCE_STATUS
677          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
678          *|----+----+-----------+----+----+----+----------+---------+----|
679          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
680          *
681          * VCE team suggest use bit 3--bit 6 for busy status check
682          */
683         mutex_lock(&adev->grbm_idx_mutex);
684         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
685         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
686                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
687                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
688         }
689         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
690         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
691                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
692                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
693         }
694         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
695         mutex_unlock(&adev->grbm_idx_mutex);
696
697         if (srbm_soft_reset) {
698                 adev->vce.srbm_soft_reset = srbm_soft_reset;
699                 return true;
700         } else {
701                 adev->vce.srbm_soft_reset = 0;
702                 return false;
703         }
704 }
705
706 static int vce_v4_0_soft_reset(void *handle)
707 {
708         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
709         u32 srbm_soft_reset;
710
711         if (!adev->vce.srbm_soft_reset)
712                 return 0;
713         srbm_soft_reset = adev->vce.srbm_soft_reset;
714
715         if (srbm_soft_reset) {
716                 u32 tmp;
717
718                 tmp = RREG32(mmSRBM_SOFT_RESET);
719                 tmp |= srbm_soft_reset;
720                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
721                 WREG32(mmSRBM_SOFT_RESET, tmp);
722                 tmp = RREG32(mmSRBM_SOFT_RESET);
723
724                 udelay(50);
725
726                 tmp &= ~srbm_soft_reset;
727                 WREG32(mmSRBM_SOFT_RESET, tmp);
728                 tmp = RREG32(mmSRBM_SOFT_RESET);
729
730                 /* Wait a little for things to settle down */
731                 udelay(50);
732         }
733
734         return 0;
735 }
736
737 static int vce_v4_0_pre_soft_reset(void *handle)
738 {
739         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740
741         if (!adev->vce.srbm_soft_reset)
742                 return 0;
743
744         mdelay(5);
745
746         return vce_v4_0_suspend(adev);
747 }
748
749
750 static int vce_v4_0_post_soft_reset(void *handle)
751 {
752         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
753
754         if (!adev->vce.srbm_soft_reset)
755                 return 0;
756
757         mdelay(5);
758
759         return vce_v4_0_resume(adev);
760 }
761
762 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
763 {
764         u32 tmp, data;
765
766         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
767         if (override)
768                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
769         else
770                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
771
772         if (tmp != data)
773                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
774 }
775
776 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
777                                              bool gated)
778 {
779         u32 data;
780
781         /* Set Override to disable Clock Gating */
782         vce_v4_0_override_vce_clock_gating(adev, true);
783
784         /* This function enables MGCG which is controlled by firmware.
785            With the clocks in the gated state the core is still
786            accessible but the firmware will throttle the clocks on the
787            fly as necessary.
788         */
789         if (gated) {
790                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
791                 data |= 0x1ff;
792                 data &= ~0xef0000;
793                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
794
795                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
796                 data |= 0x3ff000;
797                 data &= ~0xffc00000;
798                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
799
800                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
801                 data |= 0x2;
802                 data &= ~0x00010000;
803                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
804
805                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
806                 data |= 0x37f;
807                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
808
809                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
810                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
811                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
812                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
813                         0x8;
814                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
815         } else {
816                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
817                 data &= ~0x80010;
818                 data |= 0xe70008;
819                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
820
821                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
822                 data |= 0xffc00000;
823                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
824
825                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
826                 data |= 0x10000;
827                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
828
829                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
830                 data &= ~0xffc00000;
831                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
832
833                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
834                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
835                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
836                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
837                           0x8);
838                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
839         }
840         vce_v4_0_override_vce_clock_gating(adev, false);
841 }
842
843 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
844 {
845         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
846
847         if (enable)
848                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
849         else
850                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
851
852         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
853 }
854
855 static int vce_v4_0_set_clockgating_state(void *handle,
856                                           enum amd_clockgating_state state)
857 {
858         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
859         bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
860         int i;
861
862         if ((adev->asic_type == CHIP_POLARIS10) ||
863                 (adev->asic_type == CHIP_TONGA) ||
864                 (adev->asic_type == CHIP_FIJI))
865                 vce_v4_0_set_bypass_mode(adev, enable);
866
867         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
868                 return 0;
869
870         mutex_lock(&adev->grbm_idx_mutex);
871         for (i = 0; i < 2; i++) {
872                 /* Program VCE Instance 0 or 1 if not harvested */
873                 if (adev->vce.harvest_config & (1 << i))
874                         continue;
875
876                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
877
878                 if (enable) {
879                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
880                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
881                         data &= ~(0xf | 0xff0);
882                         data |= ((0x0 << 0) | (0x04 << 4));
883                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
884
885                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
886                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
887                         data &= ~(0xf | 0xff0);
888                         data |= ((0x0 << 0) | (0x04 << 4));
889                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
890                 }
891
892                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
893         }
894
895         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
896         mutex_unlock(&adev->grbm_idx_mutex);
897
898         return 0;
899 }
900
901 static int vce_v4_0_set_powergating_state(void *handle,
902                                           enum amd_powergating_state state)
903 {
904         /* This doesn't actually powergate the VCE block.
905          * That's done in the dpm code via the SMC.  This
906          * just re-inits the block as necessary.  The actual
907          * gating still happens in the dpm code.  We should
908          * revisit this when there is a cleaner line between
909          * the smc and the hw blocks
910          */
911         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
912
913         if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
914                 return 0;
915
916         if (state == AMD_PG_STATE_GATE)
917                 /* XXX do we need a vce_v4_0_stop()? */
918                 return 0;
919         else
920                 return vce_v4_0_start(adev);
921 }
922 #endif
923
924 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
925                 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
926 {
927         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
928         amdgpu_ring_write(ring, vm_id);
929         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
930         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
931         amdgpu_ring_write(ring, ib->length_dw);
932 }
933
934 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
935                         u64 seq, unsigned flags)
936 {
937         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
938
939         amdgpu_ring_write(ring, VCE_CMD_FENCE);
940         amdgpu_ring_write(ring, addr);
941         amdgpu_ring_write(ring, upper_32_bits(addr));
942         amdgpu_ring_write(ring, seq);
943         amdgpu_ring_write(ring, VCE_CMD_TRAP);
944 }
945
946 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
947 {
948         amdgpu_ring_write(ring, VCE_CMD_END);
949 }
950
951 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
952                          unsigned int vm_id, uint64_t pd_addr)
953 {
954         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
955         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
956         unsigned eng = ring->vm_inv_eng;
957
958         pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
959         pd_addr |= AMDGPU_PTE_VALID;
960
961         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
962         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
963         amdgpu_ring_write(ring, upper_32_bits(pd_addr));
964
965         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
966         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
967         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
968
969         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
970         amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
971         amdgpu_ring_write(ring, 0xffffffff);
972         amdgpu_ring_write(ring, lower_32_bits(pd_addr));
973
974         /* flush TLB */
975         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
976         amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
977         amdgpu_ring_write(ring, req);
978
979         /* wait for flush */
980         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
981         amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
982         amdgpu_ring_write(ring, 1 << vm_id);
983         amdgpu_ring_write(ring, 1 << vm_id);
984 }
985
986 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
987                                         struct amdgpu_irq_src *source,
988                                         unsigned type,
989                                         enum amdgpu_interrupt_state state)
990 {
991         uint32_t val = 0;
992
993         if (state == AMDGPU_IRQ_STATE_ENABLE)
994                 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
995
996         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
997                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
998         return 0;
999 }
1000
1001 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1002                                       struct amdgpu_irq_src *source,
1003                                       struct amdgpu_iv_entry *entry)
1004 {
1005         DRM_DEBUG("IH: VCE\n");
1006
1007         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1008                         VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1009                         ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1010
1011         switch (entry->src_data[0]) {
1012         case 0:
1013         case 1:
1014         case 2:
1015                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1016                 break;
1017         default:
1018                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1019                           entry->src_id, entry->src_data[0]);
1020                 break;
1021         }
1022
1023         return 0;
1024 }
1025
1026 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1027         .name = "vce_v4_0",
1028         .early_init = vce_v4_0_early_init,
1029         .late_init = NULL,
1030         .sw_init = vce_v4_0_sw_init,
1031         .sw_fini = vce_v4_0_sw_fini,
1032         .hw_init = vce_v4_0_hw_init,
1033         .hw_fini = vce_v4_0_hw_fini,
1034         .suspend = vce_v4_0_suspend,
1035         .resume = vce_v4_0_resume,
1036         .is_idle = NULL /* vce_v4_0_is_idle */,
1037         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1038         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1039         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1040         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1041         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1042         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1043         .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1044 };
1045
1046 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1047         .type = AMDGPU_RING_TYPE_VCE,
1048         .align_mask = 0x3f,
1049         .nop = VCE_CMD_NO_OP,
1050         .support_64bit_ptrs = false,
1051         .vmhub = AMDGPU_MMHUB,
1052         .get_rptr = vce_v4_0_ring_get_rptr,
1053         .get_wptr = vce_v4_0_ring_get_wptr,
1054         .set_wptr = vce_v4_0_ring_set_wptr,
1055         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1056         .emit_frame_size =
1057                 17 + /* vce_v4_0_emit_vm_flush */
1058                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1059                 1, /* vce_v4_0_ring_insert_end */
1060         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1061         .emit_ib = vce_v4_0_ring_emit_ib,
1062         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1063         .emit_fence = vce_v4_0_ring_emit_fence,
1064         .test_ring = amdgpu_vce_ring_test_ring,
1065         .test_ib = amdgpu_vce_ring_test_ib,
1066         .insert_nop = amdgpu_ring_insert_nop,
1067         .insert_end = vce_v4_0_ring_insert_end,
1068         .pad_ib = amdgpu_ring_generic_pad_ib,
1069         .begin_use = amdgpu_vce_ring_begin_use,
1070         .end_use = amdgpu_vce_ring_end_use,
1071 };
1072
1073 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1074 {
1075         int i;
1076
1077         for (i = 0; i < adev->vce.num_rings; i++)
1078                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1079         DRM_INFO("VCE enabled in VM mode\n");
1080 }
1081
1082 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1083         .set = vce_v4_0_set_interrupt_state,
1084         .process = vce_v4_0_process_interrupt,
1085 };
1086
1087 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1088 {
1089         adev->vce.irq.num_types = 1;
1090         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1091 };
1092
1093 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1094 {
1095         .type = AMD_IP_BLOCK_TYPE_VCE,
1096         .major = 4,
1097         .minor = 0,
1098         .rev = 0,
1099         .funcs = &vce_v4_0_ip_funcs,
1100 };