]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/gvt/kvmgt.c
Merge branches 'pm-domains', 'pm-docs' and 'pm-devfreq'
[karo-tx-linux.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
44
45 #include "i915_drv.h"
46 #include "gvt.h"
47
48 static const struct intel_gvt_ops *intel_gvt_ops;
49
50 /* helper macros copied from vfio-pci */
51 #define VFIO_PCI_OFFSET_SHIFT   40
52 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
53 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
55
56 struct vfio_region {
57         u32                             type;
58         u32                             subtype;
59         size_t                          size;
60         u32                             flags;
61 };
62
63 struct kvmgt_pgfn {
64         gfn_t gfn;
65         struct hlist_node hnode;
66 };
67
68 struct kvmgt_guest_info {
69         struct kvm *kvm;
70         struct intel_vgpu *vgpu;
71         struct kvm_page_track_notifier_node track_node;
72 #define NR_BKT (1 << 18)
73         struct hlist_head ptable[NR_BKT];
74 #undef NR_BKT
75 };
76
77 struct gvt_dma {
78         struct rb_node node;
79         gfn_t gfn;
80         kvm_pfn_t pfn;
81 };
82
83 static inline bool handle_valid(unsigned long handle)
84 {
85         return !!(handle & ~0xff);
86 }
87
88 static int kvmgt_guest_init(struct mdev_device *mdev);
89 static void intel_vgpu_release_work(struct work_struct *work);
90 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
91
92 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
93 {
94         struct rb_node *node = vgpu->vdev.cache.rb_node;
95         struct gvt_dma *ret = NULL;
96
97         while (node) {
98                 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
99
100                 if (gfn < itr->gfn)
101                         node = node->rb_left;
102                 else if (gfn > itr->gfn)
103                         node = node->rb_right;
104                 else {
105                         ret = itr;
106                         goto out;
107                 }
108         }
109
110 out:
111         return ret;
112 }
113
114 static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
115 {
116         struct gvt_dma *entry;
117
118         mutex_lock(&vgpu->vdev.cache_lock);
119         entry = __gvt_cache_find(vgpu, gfn);
120         mutex_unlock(&vgpu->vdev.cache_lock);
121
122         return entry == NULL ? 0 : entry->pfn;
123 }
124
125 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
126 {
127         struct gvt_dma *new, *itr;
128         struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
129
130         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
131         if (!new)
132                 return;
133
134         new->gfn = gfn;
135         new->pfn = pfn;
136
137         mutex_lock(&vgpu->vdev.cache_lock);
138         while (*link) {
139                 parent = *link;
140                 itr = rb_entry(parent, struct gvt_dma, node);
141
142                 if (gfn == itr->gfn)
143                         goto out;
144                 else if (gfn < itr->gfn)
145                         link = &parent->rb_left;
146                 else
147                         link = &parent->rb_right;
148         }
149
150         rb_link_node(&new->node, parent, link);
151         rb_insert_color(&new->node, &vgpu->vdev.cache);
152         mutex_unlock(&vgpu->vdev.cache_lock);
153         return;
154
155 out:
156         mutex_unlock(&vgpu->vdev.cache_lock);
157         kfree(new);
158 }
159
160 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
161                                 struct gvt_dma *entry)
162 {
163         rb_erase(&entry->node, &vgpu->vdev.cache);
164         kfree(entry);
165 }
166
167 static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
168 {
169         struct device *dev = &vgpu->vdev.mdev->dev;
170         struct gvt_dma *this;
171         unsigned long g1;
172         int rc;
173
174         mutex_lock(&vgpu->vdev.cache_lock);
175         this  = __gvt_cache_find(vgpu, gfn);
176         if (!this) {
177                 mutex_unlock(&vgpu->vdev.cache_lock);
178                 return;
179         }
180
181         g1 = gfn;
182         rc = vfio_unpin_pages(dev, &g1, 1);
183         WARN_ON(rc != 1);
184         __gvt_cache_remove_entry(vgpu, this);
185         mutex_unlock(&vgpu->vdev.cache_lock);
186 }
187
188 static void gvt_cache_init(struct intel_vgpu *vgpu)
189 {
190         vgpu->vdev.cache = RB_ROOT;
191         mutex_init(&vgpu->vdev.cache_lock);
192 }
193
194 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
195 {
196         struct gvt_dma *dma;
197         struct rb_node *node = NULL;
198         struct device *dev = &vgpu->vdev.mdev->dev;
199         unsigned long gfn;
200
201         mutex_lock(&vgpu->vdev.cache_lock);
202         while ((node = rb_first(&vgpu->vdev.cache))) {
203                 dma = rb_entry(node, struct gvt_dma, node);
204                 gfn = dma->gfn;
205
206                 vfio_unpin_pages(dev, &gfn, 1);
207                 __gvt_cache_remove_entry(vgpu, dma);
208         }
209         mutex_unlock(&vgpu->vdev.cache_lock);
210 }
211
212 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
213                 const char *name)
214 {
215         int i;
216         struct intel_vgpu_type *t;
217         const char *driver_name = dev_driver_string(
218                         &gvt->dev_priv->drm.pdev->dev);
219
220         for (i = 0; i < gvt->num_types; i++) {
221                 t = &gvt->types[i];
222                 if (!strncmp(t->name, name + strlen(driver_name) + 1,
223                         sizeof(t->name)))
224                         return t;
225         }
226
227         return NULL;
228 }
229
230 static ssize_t available_instance_show(struct kobject *kobj, struct device *dev,
231                 char *buf)
232 {
233         struct intel_vgpu_type *type;
234         unsigned int num = 0;
235         void *gvt = kdev_to_i915(dev)->gvt;
236
237         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
238         if (!type)
239                 num = 0;
240         else
241                 num = type->avail_instance;
242
243         return sprintf(buf, "%u\n", num);
244 }
245
246 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
247                 char *buf)
248 {
249         return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
250 }
251
252 static ssize_t description_show(struct kobject *kobj, struct device *dev,
253                 char *buf)
254 {
255         struct intel_vgpu_type *type;
256         void *gvt = kdev_to_i915(dev)->gvt;
257
258         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
259         if (!type)
260                 return 0;
261
262         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
263                                 "fence: %d\n",
264                                 BYTES_TO_MB(type->low_gm_size),
265                                 BYTES_TO_MB(type->high_gm_size),
266                                 type->fence);
267 }
268
269 static MDEV_TYPE_ATTR_RO(available_instance);
270 static MDEV_TYPE_ATTR_RO(device_api);
271 static MDEV_TYPE_ATTR_RO(description);
272
273 static struct attribute *type_attrs[] = {
274         &mdev_type_attr_available_instance.attr,
275         &mdev_type_attr_device_api.attr,
276         &mdev_type_attr_description.attr,
277         NULL,
278 };
279
280 static struct attribute_group *intel_vgpu_type_groups[] = {
281         [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
282 };
283
284 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
285 {
286         int i, j;
287         struct intel_vgpu_type *type;
288         struct attribute_group *group;
289
290         for (i = 0; i < gvt->num_types; i++) {
291                 type = &gvt->types[i];
292
293                 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
294                 if (WARN_ON(!group))
295                         goto unwind;
296
297                 group->name = type->name;
298                 group->attrs = type_attrs;
299                 intel_vgpu_type_groups[i] = group;
300         }
301
302         return true;
303
304 unwind:
305         for (j = 0; j < i; j++) {
306                 group = intel_vgpu_type_groups[j];
307                 kfree(group);
308         }
309
310         return false;
311 }
312
313 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
314 {
315         int i;
316         struct attribute_group *group;
317
318         for (i = 0; i < gvt->num_types; i++) {
319                 group = intel_vgpu_type_groups[i];
320                 kfree(group);
321         }
322 }
323
324 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
325 {
326         hash_init(info->ptable);
327 }
328
329 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
330 {
331         struct kvmgt_pgfn *p;
332         struct hlist_node *tmp;
333         int i;
334
335         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
336                 hash_del(&p->hnode);
337                 kfree(p);
338         }
339 }
340
341 static struct kvmgt_pgfn *
342 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
343 {
344         struct kvmgt_pgfn *p, *res = NULL;
345
346         hash_for_each_possible(info->ptable, p, hnode, gfn) {
347                 if (gfn == p->gfn) {
348                         res = p;
349                         break;
350                 }
351         }
352
353         return res;
354 }
355
356 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
357                                 gfn_t gfn)
358 {
359         struct kvmgt_pgfn *p;
360
361         p = __kvmgt_protect_table_find(info, gfn);
362         return !!p;
363 }
364
365 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
366 {
367         struct kvmgt_pgfn *p;
368
369         if (kvmgt_gfn_is_write_protected(info, gfn))
370                 return;
371
372         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
373         if (WARN(!p, "gfn: 0x%llx\n", gfn))
374                 return;
375
376         p->gfn = gfn;
377         hash_add(info->ptable, &p->hnode, gfn);
378 }
379
380 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
381                                 gfn_t gfn)
382 {
383         struct kvmgt_pgfn *p;
384
385         p = __kvmgt_protect_table_find(info, gfn);
386         if (p) {
387                 hash_del(&p->hnode);
388                 kfree(p);
389         }
390 }
391
392 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
393 {
394         struct intel_vgpu *vgpu;
395         struct intel_vgpu_type *type;
396         struct device *pdev;
397         void *gvt;
398
399         pdev = mdev->parent->dev;
400         gvt = kdev_to_i915(pdev)->gvt;
401
402         type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
403         if (!type) {
404                 gvt_err("failed to find type %s to create\n",
405                                                 kobject_name(kobj));
406                 return -EINVAL;
407         }
408
409         vgpu = intel_gvt_ops->vgpu_create(gvt, type);
410         if (IS_ERR_OR_NULL(vgpu)) {
411                 gvt_err("create intel vgpu failed\n");
412                 return -EINVAL;
413         }
414
415         INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
416
417         vgpu->vdev.mdev = mdev;
418         mdev_set_drvdata(mdev, vgpu);
419
420         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
421                      dev_name(&mdev->dev));
422         return 0;
423 }
424
425 static int intel_vgpu_remove(struct mdev_device *mdev)
426 {
427         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
428
429         if (handle_valid(vgpu->handle))
430                 return -EBUSY;
431
432         intel_gvt_ops->vgpu_destroy(vgpu);
433         return 0;
434 }
435
436 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
437                                      unsigned long action, void *data)
438 {
439         struct intel_vgpu *vgpu = container_of(nb,
440                                         struct intel_vgpu,
441                                         vdev.iommu_notifier);
442
443         if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
444                 struct vfio_iommu_type1_dma_unmap *unmap = data;
445                 unsigned long gfn, end_gfn;
446
447                 gfn = unmap->iova >> PAGE_SHIFT;
448                 end_gfn = gfn + unmap->size / PAGE_SIZE;
449
450                 while (gfn < end_gfn)
451                         gvt_cache_remove(vgpu, gfn++);
452         }
453
454         return NOTIFY_OK;
455 }
456
457 static int intel_vgpu_group_notifier(struct notifier_block *nb,
458                                      unsigned long action, void *data)
459 {
460         struct intel_vgpu *vgpu = container_of(nb,
461                                         struct intel_vgpu,
462                                         vdev.group_notifier);
463
464         /* the only action we care about */
465         if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
466                 vgpu->vdev.kvm = data;
467
468                 if (!data)
469                         schedule_work(&vgpu->vdev.release_work);
470         }
471
472         return NOTIFY_OK;
473 }
474
475 static int intel_vgpu_open(struct mdev_device *mdev)
476 {
477         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
478         unsigned long events;
479         int ret;
480
481         vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
482         vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
483
484         events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
485         ret = vfio_register_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, &events,
486                                 &vgpu->vdev.iommu_notifier);
487         if (ret != 0) {
488                 gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
489                 goto out;
490         }
491
492         events = VFIO_GROUP_NOTIFY_SET_KVM;
493         ret = vfio_register_notifier(&mdev->dev, VFIO_GROUP_NOTIFY, &events,
494                                 &vgpu->vdev.group_notifier);
495         if (ret != 0) {
496                 gvt_err("vfio_register_notifier for group failed: %d\n", ret);
497                 goto undo_iommu;
498         }
499
500         return kvmgt_guest_init(mdev);
501
502 undo_iommu:
503         vfio_unregister_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY,
504                                         &vgpu->vdev.iommu_notifier);
505 out:
506         return ret;
507 }
508
509 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
510 {
511         struct kvmgt_guest_info *info;
512
513         if (!handle_valid(vgpu->handle))
514                 return;
515
516         vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_IOMMU_NOTIFY,
517                                         &vgpu->vdev.iommu_notifier);
518         vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_GROUP_NOTIFY,
519                                         &vgpu->vdev.group_notifier);
520
521         info = (struct kvmgt_guest_info *)vgpu->handle;
522         kvmgt_guest_exit(info);
523         vgpu->handle = 0;
524 }
525
526 static void intel_vgpu_release(struct mdev_device *mdev)
527 {
528         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
529
530         __intel_vgpu_release(vgpu);
531 }
532
533 static void intel_vgpu_release_work(struct work_struct *work)
534 {
535         struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
536                                         vdev.release_work);
537         __intel_vgpu_release(vgpu);
538 }
539
540 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
541 {
542         u32 start_lo, start_hi;
543         u32 mem_type;
544         int pos = PCI_BASE_ADDRESS_0;
545
546         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
547                         PCI_BASE_ADDRESS_MEM_MASK;
548         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
549                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
550
551         switch (mem_type) {
552         case PCI_BASE_ADDRESS_MEM_TYPE_64:
553                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
554                                                 + pos + 4));
555                 break;
556         case PCI_BASE_ADDRESS_MEM_TYPE_32:
557         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
558                 /* 1M mem BAR treated as 32-bit BAR */
559         default:
560                 /* mem unknown type treated as 32-bit BAR */
561                 start_hi = 0;
562                 break;
563         }
564
565         return ((u64)start_hi << 32) | start_lo;
566 }
567
568 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
569                         size_t count, loff_t *ppos, bool is_write)
570 {
571         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
572         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
573         uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
574         int ret = -EINVAL;
575
576
577         if (index >= VFIO_PCI_NUM_REGIONS) {
578                 gvt_err("invalid index: %u\n", index);
579                 return -EINVAL;
580         }
581
582         switch (index) {
583         case VFIO_PCI_CONFIG_REGION_INDEX:
584                 if (is_write)
585                         ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
586                                                 buf, count);
587                 else
588                         ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
589                                                 buf, count);
590                 break;
591         case VFIO_PCI_BAR0_REGION_INDEX:
592         case VFIO_PCI_BAR1_REGION_INDEX:
593                 if (is_write) {
594                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
595
596                         ret = intel_gvt_ops->emulate_mmio_write(vgpu,
597                                                 bar0_start + pos, buf, count);
598                 } else {
599                         uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
600
601                         ret = intel_gvt_ops->emulate_mmio_read(vgpu,
602                                                 bar0_start + pos, buf, count);
603                 }
604                 break;
605         case VFIO_PCI_BAR2_REGION_INDEX:
606         case VFIO_PCI_BAR3_REGION_INDEX:
607         case VFIO_PCI_BAR4_REGION_INDEX:
608         case VFIO_PCI_BAR5_REGION_INDEX:
609         case VFIO_PCI_VGA_REGION_INDEX:
610         case VFIO_PCI_ROM_REGION_INDEX:
611         default:
612                 gvt_err("unsupported region: %u\n", index);
613         }
614
615         return ret == 0 ? count : ret;
616 }
617
618 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
619                         size_t count, loff_t *ppos)
620 {
621         unsigned int done = 0;
622         int ret;
623
624         while (count) {
625                 size_t filled;
626
627                 if (count >= 4 && !(*ppos % 4)) {
628                         u32 val;
629
630                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
631                                         ppos, false);
632                         if (ret <= 0)
633                                 goto read_err;
634
635                         if (copy_to_user(buf, &val, sizeof(val)))
636                                 goto read_err;
637
638                         filled = 4;
639                 } else if (count >= 2 && !(*ppos % 2)) {
640                         u16 val;
641
642                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
643                                         ppos, false);
644                         if (ret <= 0)
645                                 goto read_err;
646
647                         if (copy_to_user(buf, &val, sizeof(val)))
648                                 goto read_err;
649
650                         filled = 2;
651                 } else {
652                         u8 val;
653
654                         ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
655                                         false);
656                         if (ret <= 0)
657                                 goto read_err;
658
659                         if (copy_to_user(buf, &val, sizeof(val)))
660                                 goto read_err;
661
662                         filled = 1;
663                 }
664
665                 count -= filled;
666                 done += filled;
667                 *ppos += filled;
668                 buf += filled;
669         }
670
671         return done;
672
673 read_err:
674         return -EFAULT;
675 }
676
677 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
678                                 const char __user *buf,
679                                 size_t count, loff_t *ppos)
680 {
681         unsigned int done = 0;
682         int ret;
683
684         while (count) {
685                 size_t filled;
686
687                 if (count >= 4 && !(*ppos % 4)) {
688                         u32 val;
689
690                         if (copy_from_user(&val, buf, sizeof(val)))
691                                 goto write_err;
692
693                         ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
694                                         ppos, true);
695                         if (ret <= 0)
696                                 goto write_err;
697
698                         filled = 4;
699                 } else if (count >= 2 && !(*ppos % 2)) {
700                         u16 val;
701
702                         if (copy_from_user(&val, buf, sizeof(val)))
703                                 goto write_err;
704
705                         ret = intel_vgpu_rw(mdev, (char *)&val,
706                                         sizeof(val), ppos, true);
707                         if (ret <= 0)
708                                 goto write_err;
709
710                         filled = 2;
711                 } else {
712                         u8 val;
713
714                         if (copy_from_user(&val, buf, sizeof(val)))
715                                 goto write_err;
716
717                         ret = intel_vgpu_rw(mdev, &val, sizeof(val),
718                                         ppos, true);
719                         if (ret <= 0)
720                                 goto write_err;
721
722                         filled = 1;
723                 }
724
725                 count -= filled;
726                 done += filled;
727                 *ppos += filled;
728                 buf += filled;
729         }
730
731         return done;
732 write_err:
733         return -EFAULT;
734 }
735
736 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
737 {
738         unsigned int index;
739         u64 virtaddr;
740         unsigned long req_size, pgoff = 0;
741         pgprot_t pg_prot;
742         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
743
744         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
745         if (index >= VFIO_PCI_ROM_REGION_INDEX)
746                 return -EINVAL;
747
748         if (vma->vm_end < vma->vm_start)
749                 return -EINVAL;
750         if ((vma->vm_flags & VM_SHARED) == 0)
751                 return -EINVAL;
752         if (index != VFIO_PCI_BAR2_REGION_INDEX)
753                 return -EINVAL;
754
755         pg_prot = vma->vm_page_prot;
756         virtaddr = vma->vm_start;
757         req_size = vma->vm_end - vma->vm_start;
758         pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
759
760         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
761 }
762
763 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
764 {
765         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
766                 return 1;
767
768         return 0;
769 }
770
771 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
772                         unsigned int index, unsigned int start,
773                         unsigned int count, uint32_t flags,
774                         void *data)
775 {
776         return 0;
777 }
778
779 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
780                         unsigned int index, unsigned int start,
781                         unsigned int count, uint32_t flags, void *data)
782 {
783         return 0;
784 }
785
786 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
787                 unsigned int index, unsigned int start, unsigned int count,
788                 uint32_t flags, void *data)
789 {
790         return 0;
791 }
792
793 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
794                 unsigned int index, unsigned int start, unsigned int count,
795                 uint32_t flags, void *data)
796 {
797         struct eventfd_ctx *trigger;
798
799         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
800                 int fd = *(int *)data;
801
802                 trigger = eventfd_ctx_fdget(fd);
803                 if (IS_ERR(trigger)) {
804                         gvt_err("eventfd_ctx_fdget failed\n");
805                         return PTR_ERR(trigger);
806                 }
807                 vgpu->vdev.msi_trigger = trigger;
808         }
809
810         return 0;
811 }
812
813 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
814                 unsigned int index, unsigned int start, unsigned int count,
815                 void *data)
816 {
817         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
818                         unsigned int start, unsigned int count, uint32_t flags,
819                         void *data) = NULL;
820
821         switch (index) {
822         case VFIO_PCI_INTX_IRQ_INDEX:
823                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
824                 case VFIO_IRQ_SET_ACTION_MASK:
825                         func = intel_vgpu_set_intx_mask;
826                         break;
827                 case VFIO_IRQ_SET_ACTION_UNMASK:
828                         func = intel_vgpu_set_intx_unmask;
829                         break;
830                 case VFIO_IRQ_SET_ACTION_TRIGGER:
831                         func = intel_vgpu_set_intx_trigger;
832                         break;
833                 }
834                 break;
835         case VFIO_PCI_MSI_IRQ_INDEX:
836                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
837                 case VFIO_IRQ_SET_ACTION_MASK:
838                 case VFIO_IRQ_SET_ACTION_UNMASK:
839                         /* XXX Need masking support exported */
840                         break;
841                 case VFIO_IRQ_SET_ACTION_TRIGGER:
842                         func = intel_vgpu_set_msi_trigger;
843                         break;
844                 }
845                 break;
846         }
847
848         if (!func)
849                 return -ENOTTY;
850
851         return func(vgpu, index, start, count, flags, data);
852 }
853
854 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
855                              unsigned long arg)
856 {
857         struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
858         unsigned long minsz;
859
860         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
861
862         if (cmd == VFIO_DEVICE_GET_INFO) {
863                 struct vfio_device_info info;
864
865                 minsz = offsetofend(struct vfio_device_info, num_irqs);
866
867                 if (copy_from_user(&info, (void __user *)arg, minsz))
868                         return -EFAULT;
869
870                 if (info.argsz < minsz)
871                         return -EINVAL;
872
873                 info.flags = VFIO_DEVICE_FLAGS_PCI;
874                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
875                 info.num_regions = VFIO_PCI_NUM_REGIONS;
876                 info.num_irqs = VFIO_PCI_NUM_IRQS;
877
878                 return copy_to_user((void __user *)arg, &info, minsz) ?
879                         -EFAULT : 0;
880
881         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
882                 struct vfio_region_info info;
883                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
884                 int i, ret;
885                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
886                 size_t size;
887                 int nr_areas = 1;
888                 int cap_type_id;
889
890                 minsz = offsetofend(struct vfio_region_info, offset);
891
892                 if (copy_from_user(&info, (void __user *)arg, minsz))
893                         return -EFAULT;
894
895                 if (info.argsz < minsz)
896                         return -EINVAL;
897
898                 switch (info.index) {
899                 case VFIO_PCI_CONFIG_REGION_INDEX:
900                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
901                         info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
902                         info.flags = VFIO_REGION_INFO_FLAG_READ |
903                                      VFIO_REGION_INFO_FLAG_WRITE;
904                         break;
905                 case VFIO_PCI_BAR0_REGION_INDEX:
906                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
907                         info.size = vgpu->cfg_space.bar[info.index].size;
908                         if (!info.size) {
909                                 info.flags = 0;
910                                 break;
911                         }
912
913                         info.flags = VFIO_REGION_INFO_FLAG_READ |
914                                      VFIO_REGION_INFO_FLAG_WRITE;
915                         break;
916                 case VFIO_PCI_BAR1_REGION_INDEX:
917                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
918                         info.size = 0;
919                         info.flags = 0;
920                         break;
921                 case VFIO_PCI_BAR2_REGION_INDEX:
922                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
923                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
924                                         VFIO_REGION_INFO_FLAG_MMAP |
925                                         VFIO_REGION_INFO_FLAG_READ |
926                                         VFIO_REGION_INFO_FLAG_WRITE;
927                         info.size = gvt_aperture_sz(vgpu->gvt);
928
929                         size = sizeof(*sparse) +
930                                         (nr_areas * sizeof(*sparse->areas));
931                         sparse = kzalloc(size, GFP_KERNEL);
932                         if (!sparse)
933                                 return -ENOMEM;
934
935                         sparse->nr_areas = nr_areas;
936                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
937                         sparse->areas[0].offset =
938                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
939                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
940                         if (!caps.buf) {
941                                 kfree(caps.buf);
942                                 caps.buf = NULL;
943                                 caps.size = 0;
944                         }
945                         break;
946
947                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
948                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
949                         info.size = 0;
950
951                         info.flags = 0;
952                         gvt_dbg_core("get region info bar:%d\n", info.index);
953                         break;
954
955                 case VFIO_PCI_ROM_REGION_INDEX:
956                 case VFIO_PCI_VGA_REGION_INDEX:
957                         gvt_dbg_core("get region info index:%d\n", info.index);
958                         break;
959                 default:
960                         {
961                                 struct vfio_region_info_cap_type cap_type;
962
963                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
964                                                 vgpu->vdev.num_regions)
965                                         return -EINVAL;
966
967                                 i = info.index - VFIO_PCI_NUM_REGIONS;
968
969                                 info.offset =
970                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
971                                 info.size = vgpu->vdev.region[i].size;
972                                 info.flags = vgpu->vdev.region[i].flags;
973
974                                 cap_type.type = vgpu->vdev.region[i].type;
975                                 cap_type.subtype = vgpu->vdev.region[i].subtype;
976
977                                 ret = vfio_info_add_capability(&caps,
978                                                 VFIO_REGION_INFO_CAP_TYPE,
979                                                 &cap_type);
980                                 if (ret)
981                                         return ret;
982                         }
983                 }
984
985                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
986                         switch (cap_type_id) {
987                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
988                                 ret = vfio_info_add_capability(&caps,
989                                         VFIO_REGION_INFO_CAP_SPARSE_MMAP,
990                                         sparse);
991                                 kfree(sparse);
992                                 if (ret)
993                                         return ret;
994                                 break;
995                         default:
996                                 return -EINVAL;
997                         }
998                 }
999
1000                 if (caps.size) {
1001                         if (info.argsz < sizeof(info) + caps.size) {
1002                                 info.argsz = sizeof(info) + caps.size;
1003                                 info.cap_offset = 0;
1004                         } else {
1005                                 vfio_info_cap_shift(&caps, sizeof(info));
1006                                 if (copy_to_user((void __user *)arg +
1007                                                   sizeof(info), caps.buf,
1008                                                   caps.size)) {
1009                                         kfree(caps.buf);
1010                                         return -EFAULT;
1011                                 }
1012                                 info.cap_offset = sizeof(info);
1013                         }
1014
1015                         kfree(caps.buf);
1016                 }
1017
1018                 return copy_to_user((void __user *)arg, &info, minsz) ?
1019                         -EFAULT : 0;
1020         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1021                 struct vfio_irq_info info;
1022
1023                 minsz = offsetofend(struct vfio_irq_info, count);
1024
1025                 if (copy_from_user(&info, (void __user *)arg, minsz))
1026                         return -EFAULT;
1027
1028                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1029                         return -EINVAL;
1030
1031                 switch (info.index) {
1032                 case VFIO_PCI_INTX_IRQ_INDEX:
1033                 case VFIO_PCI_MSI_IRQ_INDEX:
1034                         break;
1035                 default:
1036                         return -EINVAL;
1037                 }
1038
1039                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1040
1041                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1042
1043                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1044                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1045                                        VFIO_IRQ_INFO_AUTOMASKED);
1046                 else
1047                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1048
1049                 return copy_to_user((void __user *)arg, &info, minsz) ?
1050                         -EFAULT : 0;
1051         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1052                 struct vfio_irq_set hdr;
1053                 u8 *data = NULL;
1054                 int ret = 0;
1055                 size_t data_size = 0;
1056
1057                 minsz = offsetofend(struct vfio_irq_set, count);
1058
1059                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1060                         return -EFAULT;
1061
1062                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1063                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1064
1065                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1066                                                 VFIO_PCI_NUM_IRQS, &data_size);
1067                         if (ret) {
1068                                 gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1069                                 return -EINVAL;
1070                         }
1071                         if (data_size) {
1072                                 data = memdup_user((void __user *)(arg + minsz),
1073                                                    data_size);
1074                                 if (IS_ERR(data))
1075                                         return PTR_ERR(data);
1076                         }
1077                 }
1078
1079                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1080                                         hdr.start, hdr.count, data);
1081                 kfree(data);
1082
1083                 return ret;
1084         } else if (cmd == VFIO_DEVICE_RESET) {
1085                 intel_gvt_ops->vgpu_reset(vgpu);
1086                 return 0;
1087         }
1088
1089         return 0;
1090 }
1091
1092 static const struct parent_ops intel_vgpu_ops = {
1093         .supported_type_groups  = intel_vgpu_type_groups,
1094         .create                 = intel_vgpu_create,
1095         .remove                 = intel_vgpu_remove,
1096
1097         .open                   = intel_vgpu_open,
1098         .release                = intel_vgpu_release,
1099
1100         .read                   = intel_vgpu_read,
1101         .write                  = intel_vgpu_write,
1102         .mmap                   = intel_vgpu_mmap,
1103         .ioctl                  = intel_vgpu_ioctl,
1104 };
1105
1106 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1107 {
1108         if (!intel_gvt_init_vgpu_type_groups(gvt))
1109                 return -EFAULT;
1110
1111         intel_gvt_ops = ops;
1112
1113         return mdev_register_device(dev, &intel_vgpu_ops);
1114 }
1115
1116 static void kvmgt_host_exit(struct device *dev, void *gvt)
1117 {
1118         intel_gvt_cleanup_vgpu_type_groups(gvt);
1119         mdev_unregister_device(dev);
1120 }
1121
1122 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1123 {
1124         struct kvmgt_guest_info *info;
1125         struct kvm *kvm;
1126         struct kvm_memory_slot *slot;
1127         int idx;
1128
1129         if (!handle_valid(handle))
1130                 return -ESRCH;
1131
1132         info = (struct kvmgt_guest_info *)handle;
1133         kvm = info->kvm;
1134
1135         idx = srcu_read_lock(&kvm->srcu);
1136         slot = gfn_to_memslot(kvm, gfn);
1137
1138         spin_lock(&kvm->mmu_lock);
1139
1140         if (kvmgt_gfn_is_write_protected(info, gfn))
1141                 goto out;
1142
1143         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1144         kvmgt_protect_table_add(info, gfn);
1145
1146 out:
1147         spin_unlock(&kvm->mmu_lock);
1148         srcu_read_unlock(&kvm->srcu, idx);
1149         return 0;
1150 }
1151
1152 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1153 {
1154         struct kvmgt_guest_info *info;
1155         struct kvm *kvm;
1156         struct kvm_memory_slot *slot;
1157         int idx;
1158
1159         if (!handle_valid(handle))
1160                 return 0;
1161
1162         info = (struct kvmgt_guest_info *)handle;
1163         kvm = info->kvm;
1164
1165         idx = srcu_read_lock(&kvm->srcu);
1166         slot = gfn_to_memslot(kvm, gfn);
1167
1168         spin_lock(&kvm->mmu_lock);
1169
1170         if (!kvmgt_gfn_is_write_protected(info, gfn))
1171                 goto out;
1172
1173         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1174         kvmgt_protect_table_del(info, gfn);
1175
1176 out:
1177         spin_unlock(&kvm->mmu_lock);
1178         srcu_read_unlock(&kvm->srcu, idx);
1179         return 0;
1180 }
1181
1182 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1183                 const u8 *val, int len,
1184                 struct kvm_page_track_notifier_node *node)
1185 {
1186         struct kvmgt_guest_info *info = container_of(node,
1187                                         struct kvmgt_guest_info, track_node);
1188
1189         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1190                 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1191                                         (void *)val, len);
1192 }
1193
1194 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1195                 struct kvm_memory_slot *slot,
1196                 struct kvm_page_track_notifier_node *node)
1197 {
1198         int i;
1199         gfn_t gfn;
1200         struct kvmgt_guest_info *info = container_of(node,
1201                                         struct kvmgt_guest_info, track_node);
1202
1203         spin_lock(&kvm->mmu_lock);
1204         for (i = 0; i < slot->npages; i++) {
1205                 gfn = slot->base_gfn + i;
1206                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1207                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1208                                                 KVM_PAGE_TRACK_WRITE);
1209                         kvmgt_protect_table_del(info, gfn);
1210                 }
1211         }
1212         spin_unlock(&kvm->mmu_lock);
1213 }
1214
1215 static bool kvmgt_check_guest(void)
1216 {
1217         unsigned int eax, ebx, ecx, edx;
1218         char s[12];
1219         unsigned int *i;
1220
1221         eax = KVM_CPUID_SIGNATURE;
1222         ebx = ecx = edx = 0;
1223
1224         asm volatile ("cpuid"
1225                       : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1226                       :
1227                       : "cc", "memory");
1228         i = (unsigned int *)s;
1229         i[0] = ebx;
1230         i[1] = ecx;
1231         i[2] = edx;
1232
1233         return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
1234 }
1235
1236 /**
1237  * NOTE:
1238  * It's actually impossible to check if we are running in KVM host,
1239  * since the "KVM host" is simply native. So we only dectect guest here.
1240  */
1241 static int kvmgt_detect_host(void)
1242 {
1243 #ifdef CONFIG_INTEL_IOMMU
1244         if (intel_iommu_gfx_mapped) {
1245                 gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
1246                 return -ENODEV;
1247         }
1248 #endif
1249         return kvmgt_check_guest() ? -ENODEV : 0;
1250 }
1251
1252 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1253 {
1254         struct intel_vgpu *itr;
1255         struct kvmgt_guest_info *info;
1256         int id;
1257         bool ret = false;
1258
1259         mutex_lock(&vgpu->gvt->lock);
1260         for_each_active_vgpu(vgpu->gvt, itr, id) {
1261                 if (!handle_valid(itr->handle))
1262                         continue;
1263
1264                 info = (struct kvmgt_guest_info *)itr->handle;
1265                 if (kvm && kvm == info->kvm) {
1266                         ret = true;
1267                         goto out;
1268                 }
1269         }
1270 out:
1271         mutex_unlock(&vgpu->gvt->lock);
1272         return ret;
1273 }
1274
1275 static int kvmgt_guest_init(struct mdev_device *mdev)
1276 {
1277         struct kvmgt_guest_info *info;
1278         struct intel_vgpu *vgpu;
1279         struct kvm *kvm;
1280
1281         vgpu = mdev_get_drvdata(mdev);
1282         if (handle_valid(vgpu->handle))
1283                 return -EEXIST;
1284
1285         kvm = vgpu->vdev.kvm;
1286         if (!kvm || kvm->mm != current->mm) {
1287                 gvt_err("KVM is required to use Intel vGPU\n");
1288                 return -ESRCH;
1289         }
1290
1291         if (__kvmgt_vgpu_exist(vgpu, kvm))
1292                 return -EEXIST;
1293
1294         info = vzalloc(sizeof(struct kvmgt_guest_info));
1295         if (!info)
1296                 return -ENOMEM;
1297
1298         vgpu->handle = (unsigned long)info;
1299         info->vgpu = vgpu;
1300         info->kvm = kvm;
1301
1302         kvmgt_protect_table_init(info);
1303         gvt_cache_init(vgpu);
1304
1305         info->track_node.track_write = kvmgt_page_track_write;
1306         info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1307         kvm_page_track_register_notifier(kvm, &info->track_node);
1308
1309         return 0;
1310 }
1311
1312 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1313 {
1314         struct intel_vgpu *vgpu;
1315
1316         if (!info) {
1317                 gvt_err("kvmgt_guest_info invalid\n");
1318                 return false;
1319         }
1320
1321         vgpu = info->vgpu;
1322
1323         kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1324         kvmgt_protect_table_destroy(info);
1325         gvt_cache_destroy(vgpu);
1326         vfree(info);
1327
1328         return true;
1329 }
1330
1331 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1332 {
1333         /* nothing to do here */
1334         return 0;
1335 }
1336
1337 static void kvmgt_detach_vgpu(unsigned long handle)
1338 {
1339         /* nothing to do here */
1340 }
1341
1342 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1343 {
1344         struct kvmgt_guest_info *info;
1345         struct intel_vgpu *vgpu;
1346
1347         if (!handle_valid(handle))
1348                 return -ESRCH;
1349
1350         info = (struct kvmgt_guest_info *)handle;
1351         vgpu = info->vgpu;
1352
1353         if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1354                 return 0;
1355
1356         return -EFAULT;
1357 }
1358
1359 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1360 {
1361         unsigned long pfn;
1362         struct kvmgt_guest_info *info;
1363         struct device *dev;
1364         int rc;
1365
1366         if (!handle_valid(handle))
1367                 return INTEL_GVT_INVALID_ADDR;
1368
1369         info = (struct kvmgt_guest_info *)handle;
1370         pfn = gvt_cache_find(info->vgpu, gfn);
1371         if (pfn != 0)
1372                 return pfn;
1373
1374         pfn = INTEL_GVT_INVALID_ADDR;
1375         dev = &info->vgpu->vdev.mdev->dev;
1376         rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
1377         if (rc != 1) {
1378                 gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
1379                 return INTEL_GVT_INVALID_ADDR;
1380         }
1381
1382         gvt_cache_add(info->vgpu, gfn, pfn);
1383         return pfn;
1384 }
1385
1386 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1387                         void *buf, unsigned long len, bool write)
1388 {
1389         struct kvmgt_guest_info *info;
1390         struct kvm *kvm;
1391         int ret;
1392         bool kthread = current->mm == NULL;
1393
1394         if (!handle_valid(handle))
1395                 return -ESRCH;
1396
1397         info = (struct kvmgt_guest_info *)handle;
1398         kvm = info->kvm;
1399
1400         if (kthread)
1401                 use_mm(kvm->mm);
1402
1403         ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1404                       kvm_read_guest(kvm, gpa, buf, len);
1405
1406         if (kthread)
1407                 unuse_mm(kvm->mm);
1408
1409         return ret;
1410 }
1411
1412 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1413                         void *buf, unsigned long len)
1414 {
1415         return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1416 }
1417
1418 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1419                         void *buf, unsigned long len)
1420 {
1421         return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1422 }
1423
1424 static unsigned long kvmgt_virt_to_pfn(void *addr)
1425 {
1426         return PFN_DOWN(__pa(addr));
1427 }
1428
1429 struct intel_gvt_mpt kvmgt_mpt = {
1430         .detect_host = kvmgt_detect_host,
1431         .host_init = kvmgt_host_init,
1432         .host_exit = kvmgt_host_exit,
1433         .attach_vgpu = kvmgt_attach_vgpu,
1434         .detach_vgpu = kvmgt_detach_vgpu,
1435         .inject_msi = kvmgt_inject_msi,
1436         .from_virt_to_mfn = kvmgt_virt_to_pfn,
1437         .set_wp_page = kvmgt_write_protect_add,
1438         .unset_wp_page = kvmgt_write_protect_remove,
1439         .read_gpa = kvmgt_read_gpa,
1440         .write_gpa = kvmgt_write_gpa,
1441         .gfn_to_mfn = kvmgt_gfn_to_pfn,
1442 };
1443 EXPORT_SYMBOL_GPL(kvmgt_mpt);
1444
1445 static int __init kvmgt_init(void)
1446 {
1447         return 0;
1448 }
1449
1450 static void __exit kvmgt_exit(void)
1451 {
1452 }
1453
1454 module_init(kvmgt_init);
1455 module_exit(kvmgt_exit);
1456
1457 MODULE_LICENSE("GPL and additional rights");
1458 MODULE_AUTHOR("Intel Corporation");