]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/gvt/kvmgt.c
Merge tag 'nfsd-4.10-1' of git://linux-nfs.org/~bfields/linux
[karo-tx-linux.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
44
45 #include "i915_drv.h"
46 #include "gvt.h"
47
48 static const struct intel_gvt_ops *intel_gvt_ops;
49
50 /* helper macros copied from vfio-pci */
51 #define VFIO_PCI_OFFSET_SHIFT   40
52 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
53 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
55
56 struct vfio_region {
57         u32                             type;
58         u32                             subtype;
59         size_t                          size;
60         u32                             flags;
61 };
62
63 struct kvmgt_pgfn {
64         gfn_t gfn;
65         struct hlist_node hnode;
66 };
67
68 struct kvmgt_guest_info {
69         struct kvm *kvm;
70         struct intel_vgpu *vgpu;
71         struct kvm_page_track_notifier_node track_node;
72 #define NR_BKT (1 << 18)
73         struct hlist_head ptable[NR_BKT];
74 #undef NR_BKT
75 };
76
77 struct gvt_dma {
78         struct rb_node node;
79         gfn_t gfn;
80         kvm_pfn_t pfn;
81 };
82
83 static inline bool handle_valid(unsigned long handle)
84 {
85         return !!(handle & ~0xff);
86 }
87
88 static int kvmgt_guest_init(struct mdev_device *mdev);
89 static void intel_vgpu_release_work(struct work_struct *work);
90 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
91
92 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
93 {
94         struct rb_node *node = vgpu->vdev.cache.rb_node;
95         struct gvt_dma *ret = NULL;
96
97         while (node) {
98                 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
99
100                 if (gfn < itr->gfn)
101                         node = node->rb_left;
102                 else if (gfn > itr->gfn)
103                         node = node->rb_right;
104                 else {
105                         ret = itr;
106                         goto out;
107                 }
108         }
109
110 out:
111         return ret;
112 }
113
114 static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
115 {
116         struct gvt_dma *entry;
117         kvm_pfn_t pfn;
118
119         mutex_lock(&vgpu->vdev.cache_lock);
120
121         entry = __gvt_cache_find(vgpu, gfn);
122         pfn = (entry == NULL) ? 0 : entry->pfn;
123
124         mutex_unlock(&vgpu->vdev.cache_lock);
125         return pfn;
126 }
127
128 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
129 {
130         struct gvt_dma *new, *itr;
131         struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
132
133         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
134         if (!new)
135                 return;
136
137         new->gfn = gfn;
138         new->pfn = pfn;
139
140         mutex_lock(&vgpu->vdev.cache_lock);
141         while (*link) {
142                 parent = *link;
143                 itr = rb_entry(parent, struct gvt_dma, node);
144
145                 if (gfn == itr->gfn)
146                         goto out;
147                 else if (gfn < itr->gfn)
148                         link = &parent->rb_left;
149                 else
150                         link = &parent->rb_right;
151         }
152
153         rb_link_node(&new->node, parent, link);
154         rb_insert_color(&new->node, &vgpu->vdev.cache);
155         mutex_unlock(&vgpu->vdev.cache_lock);
156         return;
157
158 out:
159         mutex_unlock(&vgpu->vdev.cache_lock);
160         kfree(new);
161 }
162
163 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
164                                 struct gvt_dma *entry)
165 {
166         rb_erase(&entry->node, &vgpu->vdev.cache);
167         kfree(entry);
168 }
169
170 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
171 {
172         struct device *dev = mdev_dev(vgpu->vdev.mdev);
173         struct gvt_dma *this;
174         unsigned long g1;
175         int rc;
176
177         mutex_lock(&vgpu->vdev.cache_lock);
178         this  = __gvt_cache_find(vgpu, gfn);
179         if (!this) {
180                 mutex_unlock(&vgpu->vdev.cache_lock);
181                 return;
182         }
183
184         g1 = gfn;
185         rc = vfio_unpin_pages(dev, &g1, 1);
186         WARN_ON(rc != 1);
187         __gvt_cache_remove_entry(vgpu, this);
188         mutex_unlock(&vgpu->vdev.cache_lock);
189 }
190
191 static void gvt_cache_init(struct intel_vgpu *vgpu)
192 {
193         vgpu->vdev.cache = RB_ROOT;
194         mutex_init(&vgpu->vdev.cache_lock);
195 }
196
197 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
198 {
199         struct gvt_dma *dma;
200         struct rb_node *node = NULL;
201         struct device *dev = mdev_dev(vgpu->vdev.mdev);
202         unsigned long gfn;
203
204         mutex_lock(&vgpu->vdev.cache_lock);
205         while ((node = rb_first(&vgpu->vdev.cache))) {
206                 dma = rb_entry(node, struct gvt_dma, node);
207                 gfn = dma->gfn;
208
209                 vfio_unpin_pages(dev, &gfn, 1);
210                 __gvt_cache_remove_entry(vgpu, dma);
211         }
212         mutex_unlock(&vgpu->vdev.cache_lock);
213 }
214
215 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
216                 const char *name)
217 {
218         int i;
219         struct intel_vgpu_type *t;
220         const char *driver_name = dev_driver_string(
221                         &gvt->dev_priv->drm.pdev->dev);
222
223         for (i = 0; i < gvt->num_types; i++) {
224                 t = &gvt->types[i];
225                 if (!strncmp(t->name, name + strlen(driver_name) + 1,
226                         sizeof(t->name)))
227                         return t;
228         }
229
230         return NULL;
231 }
232
233 static ssize_t available_instance_show(struct kobject *kobj, struct device *dev,
234                 char *buf)
235 {
236         struct intel_vgpu_type *type;
237         unsigned int num = 0;
238         void *gvt = kdev_to_i915(dev)->gvt;
239
240         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
241         if (!type)
242                 num = 0;
243         else
244                 num = type->avail_instance;
245
246         return sprintf(buf, "%u\n", num);
247 }
248
249 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
250                 char *buf)
251 {
252         return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
253 }
254
255 static ssize_t description_show(struct kobject *kobj, struct device *dev,
256                 char *buf)
257 {
258         struct intel_vgpu_type *type;
259         void *gvt = kdev_to_i915(dev)->gvt;
260
261         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
262         if (!type)
263                 return 0;
264
265         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
266                                 "fence: %d\n",
267                                 BYTES_TO_MB(type->low_gm_size),
268                                 BYTES_TO_MB(type->high_gm_size),
269                                 type->fence);
270 }
271
272 static MDEV_TYPE_ATTR_RO(available_instance);
273 static MDEV_TYPE_ATTR_RO(device_api);
274 static MDEV_TYPE_ATTR_RO(description);
275
276 static struct attribute *type_attrs[] = {
277         &mdev_type_attr_available_instance.attr,
278         &mdev_type_attr_device_api.attr,
279         &mdev_type_attr_description.attr,
280         NULL,
281 };
282
283 static struct attribute_group *intel_vgpu_type_groups[] = {
284         [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
285 };
286
287 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
288 {
289         int i, j;
290         struct intel_vgpu_type *type;
291         struct attribute_group *group;
292
293         for (i = 0; i < gvt->num_types; i++) {
294                 type = &gvt->types[i];
295
296                 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
297                 if (WARN_ON(!group))
298                         goto unwind;
299
300                 group->name = type->name;
301                 group->attrs = type_attrs;
302                 intel_vgpu_type_groups[i] = group;
303         }
304
305         return true;
306
307 unwind:
308         for (j = 0; j < i; j++) {
309                 group = intel_vgpu_type_groups[j];
310                 kfree(group);
311         }
312
313         return false;
314 }
315
316 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
317 {
318         int i;
319         struct attribute_group *group;
320
321         for (i = 0; i < gvt->num_types; i++) {
322                 group = intel_vgpu_type_groups[i];
323                 kfree(group);
324         }
325 }
326
327 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
328 {
329         hash_init(info->ptable);
330 }
331
332 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
333 {
334         struct kvmgt_pgfn *p;
335         struct hlist_node *tmp;
336         int i;
337
338         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
339                 hash_del(&p->hnode);
340                 kfree(p);
341         }
342 }
343
344 static struct kvmgt_pgfn *
345 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
346 {
347         struct kvmgt_pgfn *p, *res = NULL;
348
349         hash_for_each_possible(info->ptable, p, hnode, gfn) {
350                 if (gfn == p->gfn) {
351                         res = p;
352                         break;
353                 }
354         }
355
356         return res;
357 }
358
359 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
360                                 gfn_t gfn)
361 {
362         struct kvmgt_pgfn *p;
363
364         p = __kvmgt_protect_table_find(info, gfn);
365         return !!p;
366 }
367
368 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
369 {
370         struct kvmgt_pgfn *p;
371
372         if (kvmgt_gfn_is_write_protected(info, gfn))
373                 return;
374
375         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
376         if (WARN(!p, "gfn: 0x%llx\n", gfn))
377                 return;
378
379         p->gfn = gfn;
380         hash_add(info->ptable, &p->hnode, gfn);
381 }
382
383 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
384                                 gfn_t gfn)
385 {
386         struct kvmgt_pgfn *p;
387
388         p = __kvmgt_protect_table_find(info, gfn);
389         if (p) {
390                 hash_del(&p->hnode);
391                 kfree(p);
392         }
393 }
394
395 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
396 {
397         struct intel_vgpu *vgpu;
398         struct intel_vgpu_type *type;
399         struct device *pdev;
400         void *gvt;
401
402         pdev = mdev_parent_dev(mdev);
403         gvt = kdev_to_i915(pdev)->gvt;
404
405         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
406         if (!type) {
407                 gvt_err("failed to find type %s to create\n",
408                                                 kobject_name(kobj));
409                 return -EINVAL;
410         }
411
412         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
413         if (IS_ERR_OR_NULL(vgpu)) {
414                 gvt_err("create intel vgpu failed\n");
415                 return -EINVAL;
416         }
417
418         INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
419
420         vgpu->vdev.mdev = mdev;
421         mdev_set_drvdata(mdev, vgpu);
422
423         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
424                      dev_name(mdev_dev(mdev)));
425         return 0;
426 }
427
428 static int intel_vgpu_remove(struct mdev_device *mdev)
429 {
430         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
431
432         if (handle_valid(vgpu->handle))
433                 return -EBUSY;
434
435         intel_gvt_ops->vgpu_destroy(vgpu);
436         return 0;
437 }
438
439 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
440                                      unsigned long action, void *data)
441 {
442         struct intel_vgpu *vgpu = container_of(nb,
443                                         struct intel_vgpu,
444                                         vdev.iommu_notifier);
445
446         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
447                 struct vfio_iommu_type1_dma_unmap *unmap = data;
448                 unsigned long gfn, end_gfn;
449
450                 gfn = unmap->iova >> PAGE_SHIFT;
451                 end_gfn = gfn + unmap->size / PAGE_SIZE;
452
453                 while (gfn < end_gfn)
454                         gvt_cache_remove(vgpu, gfn++);
455         }
456
457         return NOTIFY_OK;
458 }
459
460 static int intel_vgpu_group_notifier(struct notifier_block *nb,
461                                      unsigned long action, void *data)
462 {
463         struct intel_vgpu *vgpu = container_of(nb,
464                                         struct intel_vgpu,
465                                         vdev.group_notifier);
466
467         /* the only action we care about */
468         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
469                 vgpu->vdev.kvm = data;
470
471                 if (!data)
472                         schedule_work(&vgpu->vdev.release_work);
473         }
474
475         return NOTIFY_OK;
476 }
477
478 static int intel_vgpu_open(struct mdev_device *mdev)
479 {
480         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
481         unsigned long events;
482         int ret;
483
484         vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
485         vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
486
487         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
488         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
489                                 &vgpu->vdev.iommu_notifier);
490         if (ret != 0) {
491                 gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
492                 goto out;
493         }
494
495         events = VFIO_GROUP_NOTIFY_SET_KVM;
496         ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
497                                 &vgpu->vdev.group_notifier);
498         if (ret != 0) {
499                 gvt_err("vfio_register_notifier for group failed: %d\n", ret);
500                 goto undo_iommu;
501         }
502
503         ret = kvmgt_guest_init(mdev);
504         if (ret)
505                 goto undo_group;
506
507         atomic_set(&vgpu->vdev.released, 0);
508         return ret;
509
510 undo_group:
511         vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
512                                         &vgpu->vdev.group_notifier);
513
514 undo_iommu:
515         vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
516                                         &vgpu->vdev.iommu_notifier);
517 out:
518         return ret;
519 }
520
521 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
522 {
523         struct kvmgt_guest_info *info;
524         int ret;
525
526         if (!handle_valid(vgpu->handle))
527                 return;
528
529         if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
530                 return;
531
532         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
533                                         &vgpu->vdev.iommu_notifier);
534         WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
535
536         ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
537                                         &vgpu->vdev.group_notifier);
538         WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
539
540         info = (struct kvmgt_guest_info *)vgpu->handle;
541         kvmgt_guest_exit(info);
542
543         vgpu->vdev.kvm = NULL;
544         vgpu->handle = 0;
545 }
546
547 static void intel_vgpu_release(struct mdev_device *mdev)
548 {
549         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
550
551         __intel_vgpu_release(vgpu);
552 }
553
554 static void intel_vgpu_release_work(struct work_struct *work)
555 {
556         struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
557                                         vdev.release_work);
558
559         __intel_vgpu_release(vgpu);
560 }
561
562 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
563 {
564         u32 start_lo, start_hi;
565         u32 mem_type;
566         int pos = PCI_BASE_ADDRESS_0;
567
568         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
569                         PCI_BASE_ADDRESS_MEM_MASK;
570         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
571                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
572
573         switch (mem_type) {
574         case PCI_BASE_ADDRESS_MEM_TYPE_64:
575                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
576                                                 + pos + 4));
577                 break;
578         case PCI_BASE_ADDRESS_MEM_TYPE_32:
579         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
580                 /* 1M mem BAR treated as 32-bit BAR */
581         default:
582                 /* mem unknown type treated as 32-bit BAR */
583                 start_hi = 0;
584                 break;
585         }
586
587         return ((u64)start_hi << 32) | start_lo;
588 }
589
590 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
591                         size_t count, loff_t *ppos, bool is_write)
592 {
593         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
594         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
595         uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
596         int ret = -EINVAL;
597
598
599         if (index >= VFIO_PCI_NUM_REGIONS) {
600                 gvt_err("invalid index: %u\n", index);
601                 return -EINVAL;
602         }
603
604         switch (index) {
605         case VFIO_PCI_CONFIG_REGION_INDEX:
606                 if (is_write)
607                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
608                                                 buf, count);
609                 else
610                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
611                                                 buf, count);
612                 break;
613         case VFIO_PCI_BAR0_REGION_INDEX:
614         case VFIO_PCI_BAR1_REGION_INDEX:
615                 if (is_write) {
616                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
617
618                         ret = intel_gvt_ops->emulate_mmio_write(vgpu,
619                                                 bar0_start + pos, buf, count);
620                 } else {
621                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
622
623                         ret = intel_gvt_ops->emulate_mmio_read(vgpu,
624                                                 bar0_start + pos, buf, count);
625                 }
626                 break;
627         case VFIO_PCI_BAR2_REGION_INDEX:
628         case VFIO_PCI_BAR3_REGION_INDEX:
629         case VFIO_PCI_BAR4_REGION_INDEX:
630         case VFIO_PCI_BAR5_REGION_INDEX:
631         case VFIO_PCI_VGA_REGION_INDEX:
632         case VFIO_PCI_ROM_REGION_INDEX:
633         default:
634                 gvt_err("unsupported region: %u\n", index);
635         }
636
637         return ret == 0 ? count : ret;
638 }
639
640 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
641                         size_t count, loff_t *ppos)
642 {
643         unsigned int done = 0;
644         int ret;
645
646         while (count) {
647                 size_t filled;
648
649                 if (count >= 4 && !(*ppos % 4)) {
650                         u32 val;
651
652                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
653                                         ppos, false);
654                         if (ret <= 0)
655                                 goto read_err;
656
657                         if (copy_to_user(buf, &val, sizeof(val)))
658                                 goto read_err;
659
660                         filled = 4;
661                 } else if (count >= 2 && !(*ppos % 2)) {
662                         u16 val;
663
664                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
665                                         ppos, false);
666                         if (ret <= 0)
667                                 goto read_err;
668
669                         if (copy_to_user(buf, &val, sizeof(val)))
670                                 goto read_err;
671
672                         filled = 2;
673                 } else {
674                         u8 val;
675
676                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
677                                         false);
678                         if (ret <= 0)
679                                 goto read_err;
680
681                         if (copy_to_user(buf, &val, sizeof(val)))
682                                 goto read_err;
683
684                         filled = 1;
685                 }
686
687                 count -= filled;
688                 done += filled;
689                 *ppos += filled;
690                 buf += filled;
691         }
692
693         return done;
694
695 read_err:
696         return -EFAULT;
697 }
698
699 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
700                                 const char __user *buf,
701                                 size_t count, loff_t *ppos)
702 {
703         unsigned int done = 0;
704         int ret;
705
706         while (count) {
707                 size_t filled;
708
709                 if (count >= 4 && !(*ppos % 4)) {
710                         u32 val;
711
712                         if (copy_from_user(&val, buf, sizeof(val)))
713                                 goto write_err;
714
715                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
716                                         ppos, true);
717                         if (ret <= 0)
718                                 goto write_err;
719
720                         filled = 4;
721                 } else if (count >= 2 && !(*ppos % 2)) {
722                         u16 val;
723
724                         if (copy_from_user(&val, buf, sizeof(val)))
725                                 goto write_err;
726
727                         ret = intel_vgpu_rw(mdev, (char *)&val,
728                                         sizeof(val), ppos, true);
729                         if (ret <= 0)
730                                 goto write_err;
731
732                         filled = 2;
733                 } else {
734                         u8 val;
735
736                         if (copy_from_user(&val, buf, sizeof(val)))
737                                 goto write_err;
738
739                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
740                                         ppos, true);
741                         if (ret <= 0)
742                                 goto write_err;
743
744                         filled = 1;
745                 }
746
747                 count -= filled;
748                 done += filled;
749                 *ppos += filled;
750                 buf += filled;
751         }
752
753         return done;
754 write_err:
755         return -EFAULT;
756 }
757
758 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
759 {
760         unsigned int index;
761         u64 virtaddr;
762         unsigned long req_size, pgoff = 0;
763         pgprot_t pg_prot;
764         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
765
766         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
767         if (index >= VFIO_PCI_ROM_REGION_INDEX)
768                 return -EINVAL;
769
770         if (vma->vm_end < vma->vm_start)
771                 return -EINVAL;
772         if ((vma->vm_flags & VM_SHARED) == 0)
773                 return -EINVAL;
774         if (index != VFIO_PCI_BAR2_REGION_INDEX)
775                 return -EINVAL;
776
777         pg_prot = vma->vm_page_prot;
778         virtaddr = vma->vm_start;
779         req_size = vma->vm_end - vma->vm_start;
780         pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
781
782         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
783 }
784
785 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
786 {
787         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
788                 return 1;
789
790         return 0;
791 }
792
793 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
794                         unsigned int index, unsigned int start,
795                         unsigned int count, uint32_t flags,
796                         void *data)
797 {
798         return 0;
799 }
800
801 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
802                         unsigned int index, unsigned int start,
803                         unsigned int count, uint32_t flags, void *data)
804 {
805         return 0;
806 }
807
808 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
809                 unsigned int index, unsigned int start, unsigned int count,
810                 uint32_t flags, void *data)
811 {
812         return 0;
813 }
814
815 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
816                 unsigned int index, unsigned int start, unsigned int count,
817                 uint32_t flags, void *data)
818 {
819         struct eventfd_ctx *trigger;
820
821         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
822                 int fd = *(int *)data;
823
824                 trigger = eventfd_ctx_fdget(fd);
825                 if (IS_ERR(trigger)) {
826                         gvt_err("eventfd_ctx_fdget failed\n");
827                         return PTR_ERR(trigger);
828                 }
829                 vgpu->vdev.msi_trigger = trigger;
830         }
831
832         return 0;
833 }
834
835 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
836                 unsigned int index, unsigned int start, unsigned int count,
837                 void *data)
838 {
839         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
840                         unsigned int start, unsigned int count, uint32_t flags,
841                         void *data) = NULL;
842
843         switch (index) {
844         case VFIO_PCI_INTX_IRQ_INDEX:
845                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
846                 case VFIO_IRQ_SET_ACTION_MASK:
847                         func = intel_vgpu_set_intx_mask;
848                         break;
849                 case VFIO_IRQ_SET_ACTION_UNMASK:
850                         func = intel_vgpu_set_intx_unmask;
851                         break;
852                 case VFIO_IRQ_SET_ACTION_TRIGGER:
853                         func = intel_vgpu_set_intx_trigger;
854                         break;
855                 }
856                 break;
857         case VFIO_PCI_MSI_IRQ_INDEX:
858                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
859                 case VFIO_IRQ_SET_ACTION_MASK:
860                 case VFIO_IRQ_SET_ACTION_UNMASK:
861                         /* XXX Need masking support exported */
862                         break;
863                 case VFIO_IRQ_SET_ACTION_TRIGGER:
864                         func = intel_vgpu_set_msi_trigger;
865                         break;
866                 }
867                 break;
868         }
869
870         if (!func)
871                 return -ENOTTY;
872
873         return func(vgpu, index, start, count, flags, data);
874 }
875
876 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
877                              unsigned long arg)
878 {
879         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
880         unsigned long minsz;
881
882         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
883
884         if (cmd == VFIO_DEVICE_GET_INFO) {
885                 struct vfio_device_info info;
886
887                 minsz = offsetofend(struct vfio_device_info, num_irqs);
888
889                 if (copy_from_user(&info, (void __user *)arg, minsz))
890                         return -EFAULT;
891
892                 if (info.argsz < minsz)
893                         return -EINVAL;
894
895                 info.flags = VFIO_DEVICE_FLAGS_PCI;
896                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
897                 info.num_regions = VFIO_PCI_NUM_REGIONS;
898                 info.num_irqs = VFIO_PCI_NUM_IRQS;
899
900                 return copy_to_user((void __user *)arg, &info, minsz) ?
901                         -EFAULT : 0;
902
903         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
904                 struct vfio_region_info info;
905                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
906                 int i, ret;
907                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
908                 size_t size;
909                 int nr_areas = 1;
910                 int cap_type_id;
911
912                 minsz = offsetofend(struct vfio_region_info, offset);
913
914                 if (copy_from_user(&info, (void __user *)arg, minsz))
915                         return -EFAULT;
916
917                 if (info.argsz < minsz)
918                         return -EINVAL;
919
920                 switch (info.index) {
921                 case VFIO_PCI_CONFIG_REGION_INDEX:
922                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
923                         info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
924                         info.flags = VFIO_REGION_INFO_FLAG_READ |
925                                      VFIO_REGION_INFO_FLAG_WRITE;
926                         break;
927                 case VFIO_PCI_BAR0_REGION_INDEX:
928                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
929                         info.size = vgpu->cfg_space.bar[info.index].size;
930                         if (!info.size) {
931                                 info.flags = 0;
932                                 break;
933                         }
934
935                         info.flags = VFIO_REGION_INFO_FLAG_READ |
936                                      VFIO_REGION_INFO_FLAG_WRITE;
937                         break;
938                 case VFIO_PCI_BAR1_REGION_INDEX:
939                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
940                         info.size = 0;
941                         info.flags = 0;
942                         break;
943                 case VFIO_PCI_BAR2_REGION_INDEX:
944                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
945                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
946                                         VFIO_REGION_INFO_FLAG_MMAP |
947                                         VFIO_REGION_INFO_FLAG_READ |
948                                         VFIO_REGION_INFO_FLAG_WRITE;
949                         info.size = gvt_aperture_sz(vgpu->gvt);
950
951                         size = sizeof(*sparse) +
952                                         (nr_areas * sizeof(*sparse->areas));
953                         sparse = kzalloc(size, GFP_KERNEL);
954                         if (!sparse)
955                                 return -ENOMEM;
956
957                         sparse->nr_areas = nr_areas;
958                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
959                         sparse->areas[0].offset =
960                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
961                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
962                         if (!caps.buf) {
963                                 kfree(caps.buf);
964                                 caps.buf = NULL;
965                                 caps.size = 0;
966                         }
967                         break;
968
969                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
970                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
971                         info.size = 0;
972
973                         info.flags = 0;
974                         gvt_dbg_core("get region info bar:%d\n", info.index);
975                         break;
976
977                 case VFIO_PCI_ROM_REGION_INDEX:
978                 case VFIO_PCI_VGA_REGION_INDEX:
979                         gvt_dbg_core("get region info index:%d\n", info.index);
980                         break;
981                 default:
982                         {
983                                 struct vfio_region_info_cap_type cap_type;
984
985                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
986                                                 vgpu->vdev.num_regions)
987                                         return -EINVAL;
988
989                                 i = info.index - VFIO_PCI_NUM_REGIONS;
990
991                                 info.offset =
992                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
993                                 info.size = vgpu->vdev.region[i].size;
994                                 info.flags = vgpu->vdev.region[i].flags;
995
996                                 cap_type.type = vgpu->vdev.region[i].type;
997                                 cap_type.subtype = vgpu->vdev.region[i].subtype;
998
999                                 ret = vfio_info_add_capability(&caps,
1000                                                 VFIO_REGION_INFO_CAP_TYPE,
1001                                                 &cap_type);
1002                                 if (ret)
1003                                         return ret;
1004                         }
1005                 }
1006
1007                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1008                         switch (cap_type_id) {
1009                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1010                                 ret = vfio_info_add_capability(&caps,
1011                                         VFIO_REGION_INFO_CAP_SPARSE_MMAP,
1012                                         sparse);
1013                                 kfree(sparse);
1014                                 if (ret)
1015                                         return ret;
1016                                 break;
1017                         default:
1018                                 return -EINVAL;
1019                         }
1020                 }
1021
1022                 if (caps.size) {
1023                         if (info.argsz < sizeof(info) + caps.size) {
1024                                 info.argsz = sizeof(info) + caps.size;
1025                                 info.cap_offset = 0;
1026                         } else {
1027                                 vfio_info_cap_shift(&caps, sizeof(info));
1028                                 if (copy_to_user((void __user *)arg +
1029                                                   sizeof(info), caps.buf,
1030                                                   caps.size)) {
1031                                         kfree(caps.buf);
1032                                         return -EFAULT;
1033                                 }
1034                                 info.cap_offset = sizeof(info);
1035                         }
1036
1037                         kfree(caps.buf);
1038                 }
1039
1040                 return copy_to_user((void __user *)arg, &info, minsz) ?
1041                         -EFAULT : 0;
1042         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1043                 struct vfio_irq_info info;
1044
1045                 minsz = offsetofend(struct vfio_irq_info, count);
1046
1047                 if (copy_from_user(&info, (void __user *)arg, minsz))
1048                         return -EFAULT;
1049
1050                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1051                         return -EINVAL;
1052
1053                 switch (info.index) {
1054                 case VFIO_PCI_INTX_IRQ_INDEX:
1055                 case VFIO_PCI_MSI_IRQ_INDEX:
1056                         break;
1057                 default:
1058                         return -EINVAL;
1059                 }
1060
1061                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1062
1063                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1064
1065                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1066                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1067                                        VFIO_IRQ_INFO_AUTOMASKED);
1068                 else
1069                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1070
1071                 return copy_to_user((void __user *)arg, &info, minsz) ?
1072                         -EFAULT : 0;
1073         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1074                 struct vfio_irq_set hdr;
1075                 u8 *data = NULL;
1076                 int ret = 0;
1077                 size_t data_size = 0;
1078
1079                 minsz = offsetofend(struct vfio_irq_set, count);
1080
1081                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1082                         return -EFAULT;
1083
1084                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1085                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1086
1087                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1088                                                 VFIO_PCI_NUM_IRQS, &data_size);
1089                         if (ret) {
1090                                 gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1091                                 return -EINVAL;
1092                         }
1093                         if (data_size) {
1094                                 data = memdup_user((void __user *)(arg + minsz),
1095                                                    data_size);
1096                                 if (IS_ERR(data))
1097                                         return PTR_ERR(data);
1098                         }
1099                 }
1100
1101                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1102                                         hdr.start, hdr.count, data);
1103                 kfree(data);
1104
1105                 return ret;
1106         } else if (cmd == VFIO_DEVICE_RESET) {
1107                 intel_gvt_ops->vgpu_reset(vgpu);
1108                 return 0;
1109         }
1110
1111         return 0;
1112 }
1113
1114 static const struct mdev_parent_ops intel_vgpu_ops = {
1115         .supported_type_groups  = intel_vgpu_type_groups,
1116         .create                 = intel_vgpu_create,
1117         .remove                 = intel_vgpu_remove,
1118
1119         .open                   = intel_vgpu_open,
1120         .release                = intel_vgpu_release,
1121
1122         .read                   = intel_vgpu_read,
1123         .write                  = intel_vgpu_write,
1124         .mmap                   = intel_vgpu_mmap,
1125         .ioctl                  = intel_vgpu_ioctl,
1126 };
1127
1128 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1129 {
1130         if (!intel_gvt_init_vgpu_type_groups(gvt))
1131                 return -EFAULT;
1132
1133         intel_gvt_ops = ops;
1134
1135         return mdev_register_device(dev, &intel_vgpu_ops);
1136 }
1137
1138 static void kvmgt_host_exit(struct device *dev, void *gvt)
1139 {
1140         intel_gvt_cleanup_vgpu_type_groups(gvt);
1141         mdev_unregister_device(dev);
1142 }
1143
1144 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1145 {
1146         struct kvmgt_guest_info *info;
1147         struct kvm *kvm;
1148         struct kvm_memory_slot *slot;
1149         int idx;
1150
1151         if (!handle_valid(handle))
1152                 return -ESRCH;
1153
1154         info = (struct kvmgt_guest_info *)handle;
1155         kvm = info->kvm;
1156
1157         idx = srcu_read_lock(&kvm->srcu);
1158         slot = gfn_to_memslot(kvm, gfn);
1159         if (!slot) {
1160                 srcu_read_unlock(&kvm->srcu, idx);
1161                 return -EINVAL;
1162         }
1163
1164         spin_lock(&kvm->mmu_lock);
1165
1166         if (kvmgt_gfn_is_write_protected(info, gfn))
1167                 goto out;
1168
1169         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1170         kvmgt_protect_table_add(info, gfn);
1171
1172 out:
1173         spin_unlock(&kvm->mmu_lock);
1174         srcu_read_unlock(&kvm->srcu, idx);
1175         return 0;
1176 }
1177
1178 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1179 {
1180         struct kvmgt_guest_info *info;
1181         struct kvm *kvm;
1182         struct kvm_memory_slot *slot;
1183         int idx;
1184
1185         if (!handle_valid(handle))
1186                 return 0;
1187
1188         info = (struct kvmgt_guest_info *)handle;
1189         kvm = info->kvm;
1190
1191         idx = srcu_read_lock(&kvm->srcu);
1192         slot = gfn_to_memslot(kvm, gfn);
1193         if (!slot) {
1194                 srcu_read_unlock(&kvm->srcu, idx);
1195                 return -EINVAL;
1196         }
1197
1198         spin_lock(&kvm->mmu_lock);
1199
1200         if (!kvmgt_gfn_is_write_protected(info, gfn))
1201                 goto out;
1202
1203         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1204         kvmgt_protect_table_del(info, gfn);
1205
1206 out:
1207         spin_unlock(&kvm->mmu_lock);
1208         srcu_read_unlock(&kvm->srcu, idx);
1209         return 0;
1210 }
1211
1212 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1213                 const u8 *val, int len,
1214                 struct kvm_page_track_notifier_node *node)
1215 {
1216         struct kvmgt_guest_info *info = container_of(node,
1217                                         struct kvmgt_guest_info, track_node);
1218
1219         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1220                 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1221                                         (void *)val, len);
1222 }
1223
1224 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1225                 struct kvm_memory_slot *slot,
1226                 struct kvm_page_track_notifier_node *node)
1227 {
1228         int i;
1229         gfn_t gfn;
1230         struct kvmgt_guest_info *info = container_of(node,
1231                                         struct kvmgt_guest_info, track_node);
1232
1233         spin_lock(&kvm->mmu_lock);
1234         for (i = 0; i < slot->npages; i++) {
1235                 gfn = slot->base_gfn + i;
1236                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1237                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1238                                                 KVM_PAGE_TRACK_WRITE);
1239                         kvmgt_protect_table_del(info, gfn);
1240                 }
1241         }
1242         spin_unlock(&kvm->mmu_lock);
1243 }
1244
1245 static bool kvmgt_check_guest(void)
1246 {
1247         unsigned int eax, ebx, ecx, edx;
1248         char s[12];
1249         unsigned int *i;
1250
1251         eax = KVM_CPUID_SIGNATURE;
1252         ebx = ecx = edx = 0;
1253
1254         asm volatile ("cpuid"
1255                       : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1256                       :
1257                       : "cc", "memory");
1258         i = (unsigned int *)s;
1259         i[0] = ebx;
1260         i[1] = ecx;
1261         i[2] = edx;
1262
1263         return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
1264 }
1265
1266 /**
1267  * NOTE:
1268  * It's actually impossible to check if we are running in KVM host,
1269  * since the "KVM host" is simply native. So we only dectect guest here.
1270  */
1271 static int kvmgt_detect_host(void)
1272 {
1273 #ifdef CONFIG_INTEL_IOMMU
1274         if (intel_iommu_gfx_mapped) {
1275                 gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
1276                 return -ENODEV;
1277         }
1278 #endif
1279         return kvmgt_check_guest() ? -ENODEV : 0;
1280 }
1281
1282 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1283 {
1284         struct intel_vgpu *itr;
1285         struct kvmgt_guest_info *info;
1286         int id;
1287         bool ret = false;
1288
1289         mutex_lock(&vgpu->gvt->lock);
1290         for_each_active_vgpu(vgpu->gvt, itr, id) {
1291                 if (!handle_valid(itr->handle))
1292                         continue;
1293
1294                 info = (struct kvmgt_guest_info *)itr->handle;
1295                 if (kvm && kvm == info->kvm) {
1296                         ret = true;
1297                         goto out;
1298                 }
1299         }
1300 out:
1301         mutex_unlock(&vgpu->gvt->lock);
1302         return ret;
1303 }
1304
1305 static int kvmgt_guest_init(struct mdev_device *mdev)
1306 {
1307         struct kvmgt_guest_info *info;
1308         struct intel_vgpu *vgpu;
1309         struct kvm *kvm;
1310
1311         vgpu = mdev_get_drvdata(mdev);
1312         if (handle_valid(vgpu->handle))
1313                 return -EEXIST;
1314
1315         kvm = vgpu->vdev.kvm;
1316         if (!kvm || kvm->mm != current->mm) {
1317                 gvt_err("KVM is required to use Intel vGPU\n");
1318                 return -ESRCH;
1319         }
1320
1321         if (__kvmgt_vgpu_exist(vgpu, kvm))
1322                 return -EEXIST;
1323
1324         info = vzalloc(sizeof(struct kvmgt_guest_info));
1325         if (!info)
1326                 return -ENOMEM;
1327
1328         vgpu->handle = (unsigned long)info;
1329         info->vgpu = vgpu;
1330         info->kvm = kvm;
1331
1332         kvmgt_protect_table_init(info);
1333         gvt_cache_init(vgpu);
1334
1335         info->track_node.track_write = kvmgt_page_track_write;
1336         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1337         kvm_page_track_register_notifier(kvm, &info->track_node);
1338
1339         return 0;
1340 }
1341
1342 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1343 {
1344         if (!info) {
1345                 gvt_err("kvmgt_guest_info invalid\n");
1346                 return false;
1347         }
1348
1349         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1350         kvmgt_protect_table_destroy(info);
1351         gvt_cache_destroy(info->vgpu);
1352         vfree(info);
1353
1354         return true;
1355 }
1356
1357 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1358 {
1359         /* nothing to do here */
1360         return 0;
1361 }
1362
1363 static void kvmgt_detach_vgpu(unsigned long handle)
1364 {
1365         /* nothing to do here */
1366 }
1367
1368 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1369 {
1370         struct kvmgt_guest_info *info;
1371         struct intel_vgpu *vgpu;
1372
1373         if (!handle_valid(handle))
1374                 return -ESRCH;
1375
1376         info = (struct kvmgt_guest_info *)handle;
1377         vgpu = info->vgpu;
1378
1379         if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1380                 return 0;
1381
1382         return -EFAULT;
1383 }
1384
1385 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1386 {
1387         unsigned long pfn;
1388         struct kvmgt_guest_info *info;
1389         struct device *dev;
1390         int rc;
1391
1392         if (!handle_valid(handle))
1393                 return INTEL_GVT_INVALID_ADDR;
1394
1395         info = (struct kvmgt_guest_info *)handle;
1396         pfn = gvt_cache_find(info->vgpu, gfn);
1397         if (pfn != 0)
1398                 return pfn;
1399
1400         pfn = INTEL_GVT_INVALID_ADDR;
1401         dev = mdev_dev(info->vgpu->vdev.mdev);
1402         rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
1403         if (rc != 1) {
1404                 gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
1405                 return INTEL_GVT_INVALID_ADDR;
1406         }
1407
1408         gvt_cache_add(info->vgpu, gfn, pfn);
1409         return pfn;
1410 }
1411
1412 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1413                         void *buf, unsigned long len, bool write)
1414 {
1415         struct kvmgt_guest_info *info;
1416         struct kvm *kvm;
1417         int ret;
1418         bool kthread = current->mm == NULL;
1419
1420         if (!handle_valid(handle))
1421                 return -ESRCH;
1422
1423         info = (struct kvmgt_guest_info *)handle;
1424         kvm = info->kvm;
1425
1426         if (kthread)
1427                 use_mm(kvm->mm);
1428
1429         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1430                       kvm_read_guest(kvm, gpa, buf, len);
1431
1432         if (kthread)
1433                 unuse_mm(kvm->mm);
1434
1435         return ret;
1436 }
1437
1438 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1439                         void *buf, unsigned long len)
1440 {
1441         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1442 }
1443
1444 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1445                         void *buf, unsigned long len)
1446 {
1447         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1448 }
1449
1450 static unsigned long kvmgt_virt_to_pfn(void *addr)
1451 {
1452         return PFN_DOWN(__pa(addr));
1453 }
1454
1455 struct intel_gvt_mpt kvmgt_mpt = {
1456         .detect_host = kvmgt_detect_host,
1457         .host_init = kvmgt_host_init,
1458         .host_exit = kvmgt_host_exit,
1459         .attach_vgpu = kvmgt_attach_vgpu,
1460         .detach_vgpu = kvmgt_detach_vgpu,
1461         .inject_msi = kvmgt_inject_msi,
1462         .from_virt_to_mfn = kvmgt_virt_to_pfn,
1463         .set_wp_page = kvmgt_write_protect_add,
1464         .unset_wp_page = kvmgt_write_protect_remove,
1465         .read_gpa = kvmgt_read_gpa,
1466         .write_gpa = kvmgt_write_gpa,
1467         .gfn_to_mfn = kvmgt_gfn_to_pfn,
1468 };
1469 EXPORT_SYMBOL_GPL(kvmgt_mpt);
1470
1471 static int __init kvmgt_init(void)
1472 {
1473         return 0;
1474 }
1475
1476 static void __exit kvmgt_exit(void)
1477 {
1478 }
1479
1480 module_init(kvmgt_init);
1481 module_exit(kvmgt_exit);
1482
1483 MODULE_LICENSE("GPL and additional rights");
1484 MODULE_AUTHOR("Intel Corporation");