]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_device.c
Merge tag 'for-linus-20170812' of git://git.infradead.org/linux-mtd
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_device.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/amd-iommu.h>
24 #include <linux/bsearch.h>
25 #include <linux/pci.h>
26 #include <linux/slab.h>
27 #include "kfd_priv.h"
28 #include "kfd_device_queue_manager.h"
29 #include "kfd_pm4_headers.h"
30
31 #define MQD_SIZE_ALIGNED 768
32
33 static const struct kfd_device_info kaveri_device_info = {
34         .asic_family = CHIP_KAVERI,
35         .max_pasid_bits = 16,
36         /* max num of queues for KV.TODO should be a dynamic value */
37         .max_no_of_hqd  = 24,
38         .ih_ring_entry_size = 4 * sizeof(uint32_t),
39         .event_interrupt_class = &event_interrupt_class_cik,
40         .num_of_watch_points = 4,
41         .mqd_size_aligned = MQD_SIZE_ALIGNED
42 };
43
44 static const struct kfd_device_info carrizo_device_info = {
45         .asic_family = CHIP_CARRIZO,
46         .max_pasid_bits = 16,
47         /* max num of queues for CZ.TODO should be a dynamic value */
48         .max_no_of_hqd  = 24,
49         .ih_ring_entry_size = 4 * sizeof(uint32_t),
50         .event_interrupt_class = &event_interrupt_class_cik,
51         .num_of_watch_points = 4,
52         .mqd_size_aligned = MQD_SIZE_ALIGNED
53 };
54
55 struct kfd_deviceid {
56         unsigned short did;
57         const struct kfd_device_info *device_info;
58 };
59
60 /* Please keep this sorted by increasing device id. */
61 static const struct kfd_deviceid supported_devices[] = {
62         { 0x1304, &kaveri_device_info },        /* Kaveri */
63         { 0x1305, &kaveri_device_info },        /* Kaveri */
64         { 0x1306, &kaveri_device_info },        /* Kaveri */
65         { 0x1307, &kaveri_device_info },        /* Kaveri */
66         { 0x1309, &kaveri_device_info },        /* Kaveri */
67         { 0x130A, &kaveri_device_info },        /* Kaveri */
68         { 0x130B, &kaveri_device_info },        /* Kaveri */
69         { 0x130C, &kaveri_device_info },        /* Kaveri */
70         { 0x130D, &kaveri_device_info },        /* Kaveri */
71         { 0x130E, &kaveri_device_info },        /* Kaveri */
72         { 0x130F, &kaveri_device_info },        /* Kaveri */
73         { 0x1310, &kaveri_device_info },        /* Kaveri */
74         { 0x1311, &kaveri_device_info },        /* Kaveri */
75         { 0x1312, &kaveri_device_info },        /* Kaveri */
76         { 0x1313, &kaveri_device_info },        /* Kaveri */
77         { 0x1315, &kaveri_device_info },        /* Kaveri */
78         { 0x1316, &kaveri_device_info },        /* Kaveri */
79         { 0x1317, &kaveri_device_info },        /* Kaveri */
80         { 0x1318, &kaveri_device_info },        /* Kaveri */
81         { 0x131B, &kaveri_device_info },        /* Kaveri */
82         { 0x131C, &kaveri_device_info },        /* Kaveri */
83         { 0x131D, &kaveri_device_info },        /* Kaveri */
84         { 0x9870, &carrizo_device_info },       /* Carrizo */
85         { 0x9874, &carrizo_device_info },       /* Carrizo */
86         { 0x9875, &carrizo_device_info },       /* Carrizo */
87         { 0x9876, &carrizo_device_info },       /* Carrizo */
88         { 0x9877, &carrizo_device_info }        /* Carrizo */
89 };
90
91 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
92                                 unsigned int chunk_size);
93 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
94
95 static const struct kfd_device_info *lookup_device_info(unsigned short did)
96 {
97         size_t i;
98
99         for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
100                 if (supported_devices[i].did == did) {
101                         BUG_ON(supported_devices[i].device_info == NULL);
102                         return supported_devices[i].device_info;
103                 }
104         }
105
106         return NULL;
107 }
108
109 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
110         struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
111 {
112         struct kfd_dev *kfd;
113
114         const struct kfd_device_info *device_info =
115                                         lookup_device_info(pdev->device);
116
117         if (!device_info)
118                 return NULL;
119
120         kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
121         if (!kfd)
122                 return NULL;
123
124         kfd->kgd = kgd;
125         kfd->device_info = device_info;
126         kfd->pdev = pdev;
127         kfd->init_complete = false;
128         kfd->kfd2kgd = f2g;
129
130         mutex_init(&kfd->doorbell_mutex);
131         memset(&kfd->doorbell_available_index, 0,
132                 sizeof(kfd->doorbell_available_index));
133
134         return kfd;
135 }
136
137 static bool device_iommu_pasid_init(struct kfd_dev *kfd)
138 {
139         const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
140                                         AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
141                                         AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
142
143         struct amd_iommu_device_info iommu_info;
144         unsigned int pasid_limit;
145         int err;
146
147         err = amd_iommu_device_info(kfd->pdev, &iommu_info);
148         if (err < 0) {
149                 dev_err(kfd_device,
150                         "error getting iommu info. is the iommu enabled?\n");
151                 return false;
152         }
153
154         if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
155                 dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
156                        (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
157                        (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
158                        (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
159                 return false;
160         }
161
162         pasid_limit = min_t(unsigned int,
163                         (unsigned int)1 << kfd->device_info->max_pasid_bits,
164                         iommu_info.max_pasids);
165         /*
166          * last pasid is used for kernel queues doorbells
167          * in the future the last pasid might be used for a kernel thread.
168          */
169         pasid_limit = min_t(unsigned int,
170                                 pasid_limit,
171                                 kfd->doorbell_process_limit - 1);
172
173         err = amd_iommu_init_device(kfd->pdev, pasid_limit);
174         if (err < 0) {
175                 dev_err(kfd_device, "error initializing iommu device\n");
176                 return false;
177         }
178
179         if (!kfd_set_pasid_limit(pasid_limit)) {
180                 dev_err(kfd_device, "error setting pasid limit\n");
181                 amd_iommu_free_device(kfd->pdev);
182                 return false;
183         }
184
185         return true;
186 }
187
188 static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
189 {
190         struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
191
192         if (dev)
193                 kfd_unbind_process_from_device(dev, pasid);
194 }
195
196 /*
197  * This function called by IOMMU driver on PPR failure
198  */
199 static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
200                 unsigned long address, u16 flags)
201 {
202         struct kfd_dev *dev;
203
204         dev_warn(kfd_device,
205                         "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
206                         PCI_BUS_NUM(pdev->devfn),
207                         PCI_SLOT(pdev->devfn),
208                         PCI_FUNC(pdev->devfn),
209                         pasid,
210                         address,
211                         flags);
212
213         dev = kfd_device_by_pci_dev(pdev);
214         BUG_ON(dev == NULL);
215
216         kfd_signal_iommu_event(dev, pasid, address,
217                         flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
218
219         return AMD_IOMMU_INV_PRI_RSP_INVALID;
220 }
221
222 bool kgd2kfd_device_init(struct kfd_dev *kfd,
223                          const struct kgd2kfd_shared_resources *gpu_resources)
224 {
225         unsigned int size;
226
227         kfd->shared_resources = *gpu_resources;
228
229         /* calculate max size of mqds needed for queues */
230         size = max_num_of_queues_per_device *
231                         kfd->device_info->mqd_size_aligned;
232
233         /*
234          * calculate max size of runlist packet.
235          * There can be only 2 packets at once
236          */
237         size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
238                 max_num_of_queues_per_device *
239                 sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
240
241         /* Add size of HIQ & DIQ */
242         size += KFD_KERNEL_QUEUE_SIZE * 2;
243
244         /* add another 512KB for all other allocations on gart (HPD, fences) */
245         size += 512 * 1024;
246
247         if (kfd->kfd2kgd->init_gtt_mem_allocation(
248                         kfd->kgd, size, &kfd->gtt_mem,
249                         &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
250                 dev_err(kfd_device,
251                         "Could not allocate %d bytes for device (%x:%x)\n",
252                         size, kfd->pdev->vendor, kfd->pdev->device);
253                 goto out;
254         }
255
256         dev_info(kfd_device,
257                 "Allocated %d bytes on gart for device(%x:%x)\n",
258                 size, kfd->pdev->vendor, kfd->pdev->device);
259
260         /* Initialize GTT sa with 512 byte chunk size */
261         if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
262                 dev_err(kfd_device,
263                         "Error initializing gtt sub-allocator\n");
264                 goto kfd_gtt_sa_init_error;
265         }
266
267         kfd_doorbell_init(kfd);
268
269         if (kfd_topology_add_device(kfd) != 0) {
270                 dev_err(kfd_device,
271                         "Error adding device (%x:%x) to topology\n",
272                         kfd->pdev->vendor, kfd->pdev->device);
273                 goto kfd_topology_add_device_error;
274         }
275
276         if (kfd_interrupt_init(kfd)) {
277                 dev_err(kfd_device,
278                         "Error initializing interrupts for device (%x:%x)\n",
279                         kfd->pdev->vendor, kfd->pdev->device);
280                 goto kfd_interrupt_error;
281         }
282
283         if (!device_iommu_pasid_init(kfd)) {
284                 dev_err(kfd_device,
285                         "Error initializing iommuv2 for device (%x:%x)\n",
286                         kfd->pdev->vendor, kfd->pdev->device);
287                 goto device_iommu_pasid_error;
288         }
289         amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
290                                                 iommu_pasid_shutdown_callback);
291         amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
292
293         kfd->dqm = device_queue_manager_init(kfd);
294         if (!kfd->dqm) {
295                 dev_err(kfd_device,
296                         "Error initializing queue manager for device (%x:%x)\n",
297                         kfd->pdev->vendor, kfd->pdev->device);
298                 goto device_queue_manager_error;
299         }
300
301         if (kfd->dqm->ops.start(kfd->dqm) != 0) {
302                 dev_err(kfd_device,
303                         "Error starting queuen manager for device (%x:%x)\n",
304                         kfd->pdev->vendor, kfd->pdev->device);
305                 goto dqm_start_error;
306         }
307
308         kfd->dbgmgr = NULL;
309
310         kfd->init_complete = true;
311         dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
312                  kfd->pdev->device);
313
314         pr_debug("kfd: Starting kfd with the following scheduling policy %d\n",
315                 sched_policy);
316
317         goto out;
318
319 dqm_start_error:
320         device_queue_manager_uninit(kfd->dqm);
321 device_queue_manager_error:
322         amd_iommu_free_device(kfd->pdev);
323 device_iommu_pasid_error:
324         kfd_interrupt_exit(kfd);
325 kfd_interrupt_error:
326         kfd_topology_remove_device(kfd);
327 kfd_topology_add_device_error:
328         kfd_gtt_sa_fini(kfd);
329 kfd_gtt_sa_init_error:
330         kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
331         dev_err(kfd_device,
332                 "device (%x:%x) NOT added due to errors\n",
333                 kfd->pdev->vendor, kfd->pdev->device);
334 out:
335         return kfd->init_complete;
336 }
337
338 void kgd2kfd_device_exit(struct kfd_dev *kfd)
339 {
340         if (kfd->init_complete) {
341                 device_queue_manager_uninit(kfd->dqm);
342                 amd_iommu_free_device(kfd->pdev);
343                 kfd_interrupt_exit(kfd);
344                 kfd_topology_remove_device(kfd);
345                 kfd_gtt_sa_fini(kfd);
346                 kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
347         }
348
349         kfree(kfd);
350 }
351
352 void kgd2kfd_suspend(struct kfd_dev *kfd)
353 {
354         BUG_ON(kfd == NULL);
355
356         if (kfd->init_complete) {
357                 kfd->dqm->ops.stop(kfd->dqm);
358                 amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
359                 amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
360                 amd_iommu_free_device(kfd->pdev);
361         }
362 }
363
364 int kgd2kfd_resume(struct kfd_dev *kfd)
365 {
366         unsigned int pasid_limit;
367         int err;
368
369         BUG_ON(kfd == NULL);
370
371         pasid_limit = kfd_get_pasid_limit();
372
373         if (kfd->init_complete) {
374                 err = amd_iommu_init_device(kfd->pdev, pasid_limit);
375                 if (err < 0)
376                         return -ENXIO;
377                 amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
378                                                 iommu_pasid_shutdown_callback);
379                 amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
380                 kfd->dqm->ops.start(kfd->dqm);
381         }
382
383         return 0;
384 }
385
386 /* This is called directly from KGD at ISR. */
387 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
388 {
389         if (!kfd->init_complete)
390                 return;
391
392         spin_lock(&kfd->interrupt_lock);
393
394         if (kfd->interrupts_active
395             && interrupt_is_wanted(kfd, ih_ring_entry)
396             && enqueue_ih_ring_entry(kfd, ih_ring_entry))
397                 schedule_work(&kfd->interrupt_work);
398
399         spin_unlock(&kfd->interrupt_lock);
400 }
401
402 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
403                                 unsigned int chunk_size)
404 {
405         unsigned int num_of_bits;
406
407         BUG_ON(!kfd);
408         BUG_ON(!kfd->gtt_mem);
409         BUG_ON(buf_size < chunk_size);
410         BUG_ON(buf_size == 0);
411         BUG_ON(chunk_size == 0);
412
413         kfd->gtt_sa_chunk_size = chunk_size;
414         kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
415
416         num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
417         BUG_ON(num_of_bits == 0);
418
419         kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
420
421         if (!kfd->gtt_sa_bitmap)
422                 return -ENOMEM;
423
424         pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
425                         kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
426
427         mutex_init(&kfd->gtt_sa_lock);
428
429         return 0;
430
431 }
432
433 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
434 {
435         mutex_destroy(&kfd->gtt_sa_lock);
436         kfree(kfd->gtt_sa_bitmap);
437 }
438
439 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
440                                                 unsigned int bit_num,
441                                                 unsigned int chunk_size)
442 {
443         return start_addr + bit_num * chunk_size;
444 }
445
446 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
447                                                 unsigned int bit_num,
448                                                 unsigned int chunk_size)
449 {
450         return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
451 }
452
453 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
454                         struct kfd_mem_obj **mem_obj)
455 {
456         unsigned int found, start_search, cur_size;
457
458         BUG_ON(!kfd);
459
460         if (size == 0)
461                 return -EINVAL;
462
463         if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
464                 return -ENOMEM;
465
466         *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
467         if ((*mem_obj) == NULL)
468                 return -ENOMEM;
469
470         pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
471
472         start_search = 0;
473
474         mutex_lock(&kfd->gtt_sa_lock);
475
476 kfd_gtt_restart_search:
477         /* Find the first chunk that is free */
478         found = find_next_zero_bit(kfd->gtt_sa_bitmap,
479                                         kfd->gtt_sa_num_of_chunks,
480                                         start_search);
481
482         pr_debug("kfd: found = %d\n", found);
483
484         /* If there wasn't any free chunk, bail out */
485         if (found == kfd->gtt_sa_num_of_chunks)
486                 goto kfd_gtt_no_free_chunk;
487
488         /* Update fields of mem_obj */
489         (*mem_obj)->range_start = found;
490         (*mem_obj)->range_end = found;
491         (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
492                                         kfd->gtt_start_gpu_addr,
493                                         found,
494                                         kfd->gtt_sa_chunk_size);
495         (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
496                                         kfd->gtt_start_cpu_ptr,
497                                         found,
498                                         kfd->gtt_sa_chunk_size);
499
500         pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
501                         (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
502
503         /* If we need only one chunk, mark it as allocated and get out */
504         if (size <= kfd->gtt_sa_chunk_size) {
505                 pr_debug("kfd: single bit\n");
506                 set_bit(found, kfd->gtt_sa_bitmap);
507                 goto kfd_gtt_out;
508         }
509
510         /* Otherwise, try to see if we have enough contiguous chunks */
511         cur_size = size - kfd->gtt_sa_chunk_size;
512         do {
513                 (*mem_obj)->range_end =
514                         find_next_zero_bit(kfd->gtt_sa_bitmap,
515                                         kfd->gtt_sa_num_of_chunks, ++found);
516                 /*
517                  * If next free chunk is not contiguous than we need to
518                  * restart our search from the last free chunk we found (which
519                  * wasn't contiguous to the previous ones
520                  */
521                 if ((*mem_obj)->range_end != found) {
522                         start_search = found;
523                         goto kfd_gtt_restart_search;
524                 }
525
526                 /*
527                  * If we reached end of buffer, bail out with error
528                  */
529                 if (found == kfd->gtt_sa_num_of_chunks)
530                         goto kfd_gtt_no_free_chunk;
531
532                 /* Check if we don't need another chunk */
533                 if (cur_size <= kfd->gtt_sa_chunk_size)
534                         cur_size = 0;
535                 else
536                         cur_size -= kfd->gtt_sa_chunk_size;
537
538         } while (cur_size > 0);
539
540         pr_debug("kfd: range_start = %d, range_end = %d\n",
541                 (*mem_obj)->range_start, (*mem_obj)->range_end);
542
543         /* Mark the chunks as allocated */
544         for (found = (*mem_obj)->range_start;
545                 found <= (*mem_obj)->range_end;
546                 found++)
547                 set_bit(found, kfd->gtt_sa_bitmap);
548
549 kfd_gtt_out:
550         mutex_unlock(&kfd->gtt_sa_lock);
551         return 0;
552
553 kfd_gtt_no_free_chunk:
554         pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
555         mutex_unlock(&kfd->gtt_sa_lock);
556         kfree(mem_obj);
557         return -ENOMEM;
558 }
559
560 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
561 {
562         unsigned int bit;
563
564         BUG_ON(!kfd);
565
566         /* Act like kfree when trying to free a NULL object */
567         if (!mem_obj)
568                 return 0;
569
570         pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
571                         mem_obj, mem_obj->range_start, mem_obj->range_end);
572
573         mutex_lock(&kfd->gtt_sa_lock);
574
575         /* Mark the chunks as free */
576         for (bit = mem_obj->range_start;
577                 bit <= mem_obj->range_end;
578                 bit++)
579                 clear_bit(bit, kfd->gtt_sa_bitmap);
580
581         mutex_unlock(&kfd->gtt_sa_lock);
582
583         kfree(mem_obj);
584         return 0;
585 }