]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/gpu/drm/i915/i915_gem.c
Merge remote-tracking branch 'airlied/drm-next' into drm-intel-next-queued
[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem.c
index 24b5b046754b37e8b8e3ab2c04a9f890b41eecf9..e29f9400c9d11d4b3fc4988ac3979935724b1b61 100644 (file)
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 #include "intel_mocs.h"
 #include <linux/dma-fence-array.h>
+#include <linux/kthread.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/stop_machine.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
@@ -46,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
-static bool cpu_cache_is_coherent(struct drm_device *dev,
-                                 enum i915_cache_level level)
-{
-       return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
-}
-
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
        if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
                return false;
 
-       if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+       if (!i915_gem_object_is_coherent(obj))
                return true;
 
        return obj->pin_display;
@@ -68,11 +65,10 @@ insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
 {
        memset(node, 0, sizeof(*node));
-       return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
-                                                  size, 0, -1,
-                                                  0, ggtt->mappable_end,
-                                                  DRM_MM_SEARCH_DEFAULT,
-                                                  DRM_MM_CREATE_DEFAULT);
+       return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
+                                          size, 0, I915_COLOR_UNEVICTABLE,
+                                          0, ggtt->mappable_end,
+                                          DRM_MM_INSERT_LOW);
 }
 
 static void
@@ -254,7 +250,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
        if (needs_clflush &&
            (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-           !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+           !i915_gem_object_is_coherent(obj))
                drm_clflush_sg(pages);
 
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
@@ -312,6 +308,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
        .release = i915_gem_object_release_phys,
 };
 
+static const struct drm_i915_gem_object_ops i915_gem_object_ops;
+
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
        struct i915_vma *vma;
@@ -399,7 +397,7 @@ out:
        if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
                i915_gem_request_retire_upto(rq);
 
-       if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+       if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
                /* The GPU is now idle and this client has stalled.
                 * Since no other client has submitted a request in the
                 * meantime, assume that this client is the only one
@@ -424,7 +422,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
                                 long timeout,
                                 struct intel_rps_client *rps)
 {
+       unsigned int seq = __read_seqcount_begin(&resv->seq);
        struct dma_fence *excl;
+       bool prune_fences = false;
 
        if (flags & I915_WAIT_ALL) {
                struct dma_fence **shared;
@@ -440,7 +440,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
                        timeout = i915_gem_object_wait_fence(shared[i],
                                                             flags, timeout,
                                                             rps);
-                       if (timeout <= 0)
+                       if (timeout < 0)
                                break;
 
                        dma_fence_put(shared[i]);
@@ -449,15 +449,26 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
                for (; i < count; i++)
                        dma_fence_put(shared[i]);
                kfree(shared);
+
+               prune_fences = count && timeout >= 0;
        } else {
                excl = reservation_object_get_excl_rcu(resv);
        }
 
-       if (excl && timeout > 0)
+       if (excl && timeout >= 0) {
                timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+               prune_fences = timeout >= 0;
+       }
 
        dma_fence_put(excl);
 
+       if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+               reservation_object_lock(resv, NULL);
+               if (!__read_seqcount_retry(&resv->seq, seq))
+                       reservation_object_add_excl_fence(resv, NULL);
+               reservation_object_unlock(resv);
+       }
+
        return timeout;
 }
 
@@ -585,9 +596,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
        if (obj->mm.pages)
                return -EBUSY;
 
+       GEM_BUG_ON(obj->ops != &i915_gem_object_ops);
        obj->ops = &i915_gem_phys_ops;
 
-       return i915_gem_object_pin_pages(obj);
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               goto err_xfer;
+
+       return 0;
+
+err_xfer:
+       obj->ops = &i915_gem_object_ops;
+       return ret;
 }
 
 static int
@@ -608,13 +628,12 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
        drm_clflush_virt_range(vaddr, args->size);
        i915_gem_chipset_flush(to_i915(obj->base.dev));
 
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+       intel_fb_obj_flush(obj, ORIGIN_CPU);
        return 0;
 }
 
-void *i915_gem_object_alloc(struct drm_device *dev)
+void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
        return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 }
 
@@ -626,7 +645,7 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 
 static int
 i915_gem_create(struct drm_file *file,
-               struct drm_device *dev,
+               struct drm_i915_private *dev_priv,
                uint64_t size,
                uint32_t *handle_p)
 {
@@ -639,7 +658,7 @@ i915_gem_create(struct drm_file *file,
                return -EINVAL;
 
        /* Allocate the new object */
-       obj = i915_gem_object_create(dev, size);
+       obj = i915_gem_object_create(dev_priv, size);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
@@ -661,7 +680,7 @@ i915_gem_dumb_create(struct drm_file *file,
        /* have to work out size/pitch and return them */
        args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
        args->size = args->pitch * args->height;
-       return i915_gem_create(file, dev,
+       return i915_gem_create(file, to_i915(dev),
                               args->size, &args->handle);
 }
 
@@ -675,11 +694,12 @@ int
 i915_gem_create_ioctl(struct drm_device *dev, void *data,
                      struct drm_file *file)
 {
+       struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_create *args = data;
 
-       i915_gem_flush_free_objects(to_i915(dev));
+       i915_gem_flush_free_objects(dev_priv);
 
-       return i915_gem_create(file, dev,
+       return i915_gem_create(file, dev_priv,
                               args->size, &args->handle);
 }
 
@@ -771,8 +791,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
         * anyway again before the next pread happens.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
-                                                       obj->cache_level);
+               *needs_clflush = !i915_gem_object_is_coherent(obj);
 
        if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                ret = i915_gem_object_set_to_cpu_domain(obj, false);
@@ -828,8 +847,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
         * before writing.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
-                                                        obj->cache_level);
+               *needs_clflush |= !i915_gem_object_is_coherent(obj);
 
        if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                ret = i915_gem_object_set_to_cpu_domain(obj, true);
@@ -1114,8 +1132,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
                return -ENOENT;
 
        /* Bounds check source.  */
-       if (args->offset > obj->base.size ||
-           args->size > obj->base.size - args->offset) {
+       if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
                ret = -EINVAL;
                goto out;
        }
@@ -1258,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
                user_data += page_length;
                offset += page_length;
        }
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+       intel_fb_obj_flush(obj, ORIGIN_CPU);
 
        mutex_lock(&i915->drm.struct_mutex);
 out_unpin:
@@ -1394,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
                offset = 0;
        }
 
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+       intel_fb_obj_flush(obj, ORIGIN_CPU);
        i915_gem_obj_finish_shmem_access(obj);
        return ret;
 }
@@ -1428,14 +1445,19 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                return -ENOENT;
 
        /* Bounds check destination. */
-       if (args->offset > obj->base.size ||
-           args->size > obj->base.size - args->offset) {
+       if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
                ret = -EINVAL;
                goto err;
        }
 
        trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
+       ret = -ENODEV;
+       if (obj->ops->pwrite)
+               ret = obj->ops->pwrite(obj, args);
+       if (ret != -ENODEV)
+               goto err;
+
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_ALL,
@@ -1491,7 +1513,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 
        list_for_each_entry(vma, &obj->vma_list, obj_link) {
                if (!i915_vma_is_ggtt(vma))
-                       continue;
+                       break;
 
                if (i915_vma_is_active(vma))
                        continue;
@@ -1598,23 +1620,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 {
        struct drm_i915_gem_sw_finish *args = data;
        struct drm_i915_gem_object *obj;
-       int err = 0;
 
        obj = i915_gem_object_lookup(file, args->handle);
        if (!obj)
                return -ENOENT;
 
        /* Pinned buffers may be scanout, so flush the cache */
-       if (READ_ONCE(obj->pin_display)) {
-               err = i915_mutex_lock_interruptible(dev);
-               if (!err) {
-                       i915_gem_object_flush_cpu_write_domain(obj);
-                       mutex_unlock(&dev->struct_mutex);
-               }
-       }
-
+       i915_gem_object_flush_if_display(obj);
        i915_gem_object_put(obj);
-       return err;
+
+       return 0;
 }
 
 /**
@@ -1696,12 +1711,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 
 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
 {
-       u64 size;
-
-       size = i915_gem_object_get_stride(obj);
-       size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
-
-       return size >> PAGE_SHIFT;
+       return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
 }
 
 /**
@@ -1754,9 +1764,31 @@ int i915_gem_mmap_gtt_version(void)
        return 1;
 }
 
+static inline struct i915_ggtt_view
+compute_partial_view(struct drm_i915_gem_object *obj,
+                    pgoff_t page_offset,
+                    unsigned int chunk)
+{
+       struct i915_ggtt_view view;
+
+       if (i915_gem_object_is_tiled(obj))
+               chunk = roundup(chunk, tile_row_pages(obj));
+
+       view.type = I915_GGTT_VIEW_PARTIAL;
+       view.partial.offset = rounddown(page_offset, chunk);
+       view.partial.size =
+               min_t(unsigned int, chunk,
+                     (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+       /* If the partial covers the entire object, just create a normal VMA. */
+       if (chunk >= obj->base.size >> PAGE_SHIFT)
+               view.type = I915_GGTT_VIEW_NORMAL;
+
+       return view;
+}
+
 /**
  * i915_gem_fault - fault a page into the GTT
- * @area: CPU VMA in question
  * @vmf: fault info
  *
  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@ -1773,9 +1805,10 @@ int i915_gem_mmap_gtt_version(void)
  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
  */
-int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+int i915_gem_fault(struct vm_fault *vmf)
 {
 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+       struct vm_area_struct *area = vmf->vma;
        struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
@@ -1830,26 +1863,9 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
        /* Now pin it into the GTT as needed */
        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
        if (IS_ERR(vma)) {
-               struct i915_ggtt_view view;
-               unsigned int chunk_size;
-
                /* Use a partial view if it is bigger than available space */
-               chunk_size = MIN_CHUNK_PAGES;
-               if (i915_gem_object_is_tiled(obj))
-                       chunk_size = roundup(chunk_size, tile_row_pages(obj));
-
-               memset(&view, 0, sizeof(view));
-               view.type = I915_GGTT_VIEW_PARTIAL;
-               view.params.partial.offset = rounddown(page_offset, chunk_size);
-               view.params.partial.size =
-                       min_t(unsigned int, chunk_size,
-                             vma_pages(area) - view.params.partial.offset);
-
-               /* If the partial covers the entire object, just create a
-                * normal VMA.
-                */
-               if (chunk_size >= obj->base.size >> PAGE_SHIFT)
-                       view.type = I915_GGTT_VIEW_NORMAL;
+               struct i915_ggtt_view view =
+                       compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
 
                /* Userspace is now writing through an untracked VMA, abandon
                 * all hope that the hardware is able to track future writes.
@@ -1878,7 +1894,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 
        /* Finally, remap it using the new GTT offset */
        ret = remap_io_mapping(area,
-                              area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
+                              area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
                               (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
                               min_t(u64, vma->size, area->vm_end - area->vm_start),
                               &ggtt->mappable);
@@ -2029,91 +2045,27 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
        }
 }
 
-/**
- * i915_gem_get_ggtt_size - return required global GTT size for an object
- * @dev_priv: i915 device
- * @size: object size
- * @tiling_mode: tiling mode
- *
- * Return the required global GTT size for an object, taking into account
- * potential fence register mapping.
- */
-u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
-                          u64 size, int tiling_mode)
-{
-       u64 ggtt_size;
-
-       GEM_BUG_ON(size == 0);
-
-       if (INTEL_GEN(dev_priv) >= 4 ||
-           tiling_mode == I915_TILING_NONE)
-               return size;
-
-       /* Previous chips need a power-of-two fence region when tiling */
-       if (IS_GEN3(dev_priv))
-               ggtt_size = 1024*1024;
-       else
-               ggtt_size = 512*1024;
-
-       while (ggtt_size < size)
-               ggtt_size <<= 1;
-
-       return ggtt_size;
-}
-
-/**
- * i915_gem_get_ggtt_alignment - return required global GTT alignment
- * @dev_priv: i915 device
- * @size: object size
- * @tiling_mode: tiling mode
- * @fenced: is fenced alignment required or not
- *
- * Return the required global GTT alignment for an object, taking into account
- * potential fence register mapping.
- */
-u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
-                               int tiling_mode, bool fenced)
-{
-       GEM_BUG_ON(size == 0);
-
-       /*
-        * Minimum alignment is 4k (GTT page size), but might be greater
-        * if a fence register is needed for the object.
-        */
-       if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
-           tiling_mode == I915_TILING_NONE)
-               return 4096;
-
-       /*
-        * Previous chips need to be aligned to the size of the smallest
-        * fence register that can contain the object.
-        */
-       return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
-}
-
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 {
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        int err;
 
        err = drm_gem_create_mmap_offset(&obj->base);
-       if (!err)
+       if (likely(!err))
                return 0;
 
-       /* We can idle the GPU locklessly to flush stale objects, but in order
-        * to claim that space for ourselves, we need to take the big
-        * struct_mutex to free the requests+objects and allocate our slot.
-        */
-       err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
-       if (err)
-               return err;
+       /* Attempt to reap some mmap space from dead objects */
+       do {
+               err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+               if (err)
+                       break;
 
-       err = i915_mutex_lock_interruptible(&dev_priv->drm);
-       if (!err) {
-               i915_gem_retire_requests(dev_priv);
+               i915_gem_drain_freed_objects(dev_priv);
                err = drm_gem_create_mmap_offset(&obj->base);
-               mutex_unlock(&dev_priv->drm.struct_mutex);
-       }
+               if (!err)
+                       break;
+
+       } while (flush_delayed_work(&dev_priv->gt.retire_work));
 
        return err;
 }
@@ -2184,6 +2136,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
         */
        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
        obj->mm.madv = __I915_MADV_PURGED;
+       obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
 /* Try to discard unwanted pages */
@@ -2283,22 +2236,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 
        __i915_gem_object_reset_page_iter(obj);
 
-       obj->ops->put_pages(obj, pages);
+       if (!IS_ERR(pages))
+               obj->ops->put_pages(obj, pages);
+
 unlock:
        mutex_unlock(&obj->mm.lock);
 }
 
-static void i915_sg_trim(struct sg_table *orig_st)
+static bool i915_sg_trim(struct sg_table *orig_st)
 {
        struct sg_table new_st;
        struct scatterlist *sg, *new_sg;
        unsigned int i;
 
        if (orig_st->nents == orig_st->orig_nents)
-               return;
+               return false;
 
        if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
-               return;
+               return false;
 
        new_sg = new_st.sgl;
        for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
@@ -2306,10 +2261,12 @@ static void i915_sg_trim(struct sg_table *orig_st)
                /* called before being DMA mapped, no need to copy sg->dma_* */
                new_sg = sg_next(new_sg);
        }
+       GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
 
        sg_free_table(orig_st);
 
        *orig_st = new_st;
+       return true;
 }
 
 static struct sg_table *
@@ -2501,7 +2458,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
        if (err)
                return err;
 
-       if (unlikely(!obj->mm.pages)) {
+       if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                err = ____i915_gem_object_get_pages(obj);
                if (err)
                        goto unlock;
@@ -2579,7 +2536,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
        pinned = true;
        if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-               if (unlikely(!obj->mm.pages)) {
+               if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
                        ret = ____i915_gem_object_get_pages(obj);
                        if (ret)
                                goto err_unlock;
@@ -2627,41 +2584,110 @@ err_unlock:
        goto out_unlock;
 }
 
-static bool i915_context_is_banned(const struct i915_gem_context *ctx)
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+                          const struct drm_i915_gem_pwrite *arg)
 {
-       unsigned long elapsed;
+       struct address_space *mapping = obj->base.filp->f_mapping;
+       char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+       u64 remain, offset;
+       unsigned int pg;
+
+       /* Before we instantiate/pin the backing store for our use, we
+        * can prepopulate the shmemfs filp efficiently using a write into
+        * the pagecache. We avoid the penalty of instantiating all the
+        * pages, important if the user is just writing to a few and never
+        * uses the object on the GPU, and using a direct write into shmemfs
+        * allows it to avoid the cost of retrieving a page (either swapin
+        * or clearing-before-use) before it is overwritten.
+        */
+       if (READ_ONCE(obj->mm.pages))
+               return -ENODEV;
 
-       if (ctx->hang_stats.banned)
-               return true;
+       /* Before the pages are instantiated the object is treated as being
+        * in the CPU domain. The pages will be clflushed as required before
+        * use, and we can freely write into the pages directly. If userspace
+        * races pwrite with any other operation; corruption will ensue -
+        * that is userspace's prerogative!
+        */
 
-       elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
-       if (ctx->hang_stats.ban_period_seconds &&
-           elapsed <= ctx->hang_stats.ban_period_seconds) {
-               DRM_DEBUG("context hanging too fast, banning!\n");
-               return true;
-       }
+       remain = arg->size;
+       offset = arg->offset;
+       pg = offset_in_page(offset);
 
-       return false;
+       do {
+               unsigned int len, unwritten;
+               struct page *page;
+               void *data, *vaddr;
+               int err;
+
+               len = PAGE_SIZE - pg;
+               if (len > remain)
+                       len = remain;
+
+               err = pagecache_write_begin(obj->base.filp, mapping,
+                                           offset, len, 0,
+                                           &page, &data);
+               if (err < 0)
+                       return err;
+
+               vaddr = kmap(page);
+               unwritten = copy_from_user(vaddr + pg, user_data, len);
+               kunmap(page);
+
+               err = pagecache_write_end(obj->base.filp, mapping,
+                                         offset, len, len - unwritten,
+                                         page, data);
+               if (err < 0)
+                       return err;
+
+               if (unwritten)
+                       return -EFAULT;
+
+               remain -= len;
+               user_data += len;
+               offset += len;
+               pg = 0;
+       } while (remain);
+
+       return 0;
 }
 
-static void i915_set_reset_status(struct i915_gem_context *ctx,
-                                 const bool guilty)
+static bool ban_context(const struct i915_gem_context *ctx)
 {
-       struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
+       return (i915_gem_context_is_bannable(ctx) &&
+               ctx->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD);
+}
 
-       if (guilty) {
-               hs->banned = i915_context_is_banned(ctx);
-               hs->batch_active++;
-               hs->guilty_ts = get_seconds();
-       } else {
-               hs->batch_pending++;
-       }
+static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
+{
+       ctx->guilty_count++;
+       ctx->ban_score += CONTEXT_SCORE_GUILTY;
+       if (ban_context(ctx))
+               i915_gem_context_set_banned(ctx);
+
+       DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
+                        ctx->name, ctx->ban_score,
+                        yesno(i915_gem_context_is_banned(ctx)));
+
+       if (!i915_gem_context_is_banned(ctx) || IS_ERR_OR_NULL(ctx->file_priv))
+               return;
+
+       ctx->file_priv->context_bans++;
+       DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
+                        ctx->name, ctx->file_priv->context_bans);
+}
+
+static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
+{
+       ctx->active_count++;
 }
 
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
-       struct drm_i915_gem_request *request;
+       struct drm_i915_gem_request *request, *active = NULL;
+       unsigned long flags;
 
        /* We are called by the error capture and reset at a random
         * point in time. In particular, note that neither is crucially
@@ -2671,17 +2697,86 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
         * extra delay for a recent interrupt is pointless. Hence, we do
         * not need an engine->irq_seqno_barrier() before the seqno reads.
         */
+       spin_lock_irqsave(&engine->timeline->lock, flags);
        list_for_each_entry(request, &engine->timeline->requests, link) {
-               if (__i915_gem_request_completed(request))
+               if (__i915_gem_request_completed(request,
+                                                request->global_seqno))
                        continue;
 
-               return request;
+               GEM_BUG_ON(request->engine != engine);
+               GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                                   &request->fence.flags));
+
+               active = request;
+               break;
+       }
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+       return active;
+}
+
+static bool engine_stalled(struct intel_engine_cs *engine)
+{
+       if (!engine->hangcheck.stalled)
+               return false;
+
+       /* Check for possible seqno movement after hang declaration */
+       if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
+               DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
+               return false;
        }
 
-       return NULL;
+       return true;
 }
 
-static void reset_request(struct drm_i915_gem_request *request)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /* Ensure irq handler finishes, and not run again. */
+       for_each_engine(engine, dev_priv, id) {
+               struct drm_i915_gem_request *request;
+
+               /* Prevent the signaler thread from updating the request
+                * state (by calling dma_fence_signal) as we are processing
+                * the reset. The write from the GPU of the seqno is
+                * asynchronous and the signaler thread may see a different
+                * value to us and declare the request complete, even though
+                * the reset routine have picked that request as the active
+                * (incomplete) request. This conflict is not handled
+                * gracefully!
+                */
+               kthread_park(engine->breadcrumbs.signaler);
+
+               /* Prevent request submission to the hardware until we have
+                * completed the reset in i915_gem_reset_finish(). If a request
+                * is completed by one engine, it may then queue a request
+                * to a second via its engine->irq_tasklet *just* as we are
+                * calling engine->init_hw() and also writing the ELSP.
+                * Turning off the engine->irq_tasklet until the reset is over
+                * prevents the race.
+                */
+               tasklet_kill(&engine->irq_tasklet);
+               tasklet_disable(&engine->irq_tasklet);
+
+               if (engine->irq_seqno_barrier)
+                       engine->irq_seqno_barrier(engine);
+
+               if (engine_stalled(engine)) {
+                       request = i915_gem_find_active_request(engine);
+                       if (request && request->fence.error == -EIO)
+                               err = -EIO; /* Previous reset failed! */
+               }
+       }
+
+       i915_gem_revoke_fences(dev_priv);
+
+       return err;
+}
+
+static void skip_request(struct drm_i915_gem_request *request)
 {
        void *vaddr = request->ring->vaddr;
        u32 head;
@@ -2696,63 +2791,87 @@ static void reset_request(struct drm_i915_gem_request *request)
                head = 0;
        }
        memset(vaddr + head, 0, request->postfix - head);
+
+       dma_fence_set_error(&request->fence, -EIO);
 }
 
-static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+static void engine_skip_context(struct drm_i915_gem_request *request)
 {
-       struct drm_i915_gem_request *request;
-       struct i915_gem_context *incomplete_ctx;
+       struct intel_engine_cs *engine = request->engine;
+       struct i915_gem_context *hung_ctx = request->ctx;
        struct intel_timeline *timeline;
        unsigned long flags;
-       bool ring_hung;
 
-       if (engine->irq_seqno_barrier)
-               engine->irq_seqno_barrier(engine);
+       timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
 
-       request = i915_gem_find_active_request(engine);
-       if (!request)
-               return;
+       spin_lock_irqsave(&engine->timeline->lock, flags);
+       spin_lock(&timeline->lock);
 
-       ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-       if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
-               ring_hung = false;
+       list_for_each_entry_continue(request, &engine->timeline->requests, link)
+               if (request->ctx == hung_ctx)
+                       skip_request(request);
 
-       i915_set_reset_status(request->ctx, ring_hung);
-       if (!ring_hung)
-               return;
+       list_for_each_entry(request, &timeline->requests, link)
+               skip_request(request);
 
-       DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-                        engine->name, request->global_seqno);
+       spin_unlock(&timeline->lock);
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
 
-       /* Setup the CS to resume from the breadcrumb of the hung request */
-       engine->reset_hw(engine, request);
+/* Returns true if the request was guilty of hang */
+static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
+{
+       /* Read once and return the resolution */
+       const bool guilty = engine_stalled(request->engine);
 
-       /* Users of the default context do not rely on logical state
-        * preserved between batches. They have to emit full state on
-        * every batch and so it is safe to execute queued requests following
-        * the hang.
+       /* The guilty request will get skipped on a hung engine.
+        *
+        * Users of client default contexts do not rely on logical
+        * state preserved between batches so it is safe to execute
+        * queued requests following the hang. Non default contexts
+        * rely on preserved state, so skipping a batch loses the
+        * evolution of the state and it needs to be considered corrupted.
+        * Executing more queued batches on top of corrupted state is
+        * risky. But we take the risk by trying to advance through
+        * the queued requests in order to make the client behaviour
+        * more predictable around resets, by not throwing away random
+        * amount of batches it has prepared for execution. Sophisticated
+        * clients can use gem_reset_stats_ioctl and dma fence status
+        * (exported via sync_file info ioctl on explicit fences) to observe
+        * when it loses the context state and should rebuild accordingly.
         *
-        * Other contexts preserve state, now corrupt. We want to skip all
-        * queued requests that reference the corrupt context.
+        * The context ban, and ultimately the client ban, mechanism are safety
+        * valves if client submission ends up resulting in nothing more than
+        * subsequent hangs.
         */
-       incomplete_ctx = request->ctx;
-       if (i915_gem_context_is_default(incomplete_ctx))
-               return;
 
-       timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+       if (guilty) {
+               i915_gem_context_mark_guilty(request->ctx);
+               skip_request(request);
+       } else {
+               i915_gem_context_mark_innocent(request->ctx);
+               dma_fence_set_error(&request->fence, -EAGAIN);
+       }
 
-       spin_lock_irqsave(&engine->timeline->lock, flags);
-       spin_lock(&timeline->lock);
+       return guilty;
+}
 
-       list_for_each_entry_continue(request, &engine->timeline->requests, link)
-               if (request->ctx == incomplete_ctx)
-                       reset_request(request);
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_request *request;
 
-       list_for_each_entry(request, &timeline->requests, link)
-               reset_request(request);
+       request = i915_gem_find_active_request(engine);
+       if (request && i915_gem_reset_request(request)) {
+               DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+                                engine->name, request->global_seqno);
 
-       spin_unlock(&timeline->lock);
-       spin_unlock_irqrestore(&engine->timeline->lock, flags);
+               /* If this context is now banned, skip all pending requests. */
+               if (i915_gem_context_is_banned(request->ctx))
+                       engine_skip_context(request);
+       }
+
+       /* Setup the CS to resume from the breadcrumb of the hung request */
+       engine->reset_hw(engine, request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -2764,8 +2883,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
        i915_gem_retire_requests(dev_priv);
 
-       for_each_engine(engine, dev_priv, id)
+       for_each_engine(engine, dev_priv, id) {
+               struct i915_gem_context *ctx;
+
                i915_gem_reset_engine(engine);
+               ctx = fetch_and_zero(&engine->last_retired_context);
+               if (ctx)
+                       engine->context_unpin(engine, ctx);
+       }
 
        i915_gem_restore_fences(dev_priv);
 
@@ -2777,16 +2902,45 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
        }
 }
 
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+       for_each_engine(engine, dev_priv, id) {
+               tasklet_enable(&engine->irq_tasklet);
+               kthread_unpark(engine->breadcrumbs.signaler);
+       }
+}
+
 static void nop_submit_request(struct drm_i915_gem_request *request)
 {
+       dma_fence_set_error(&request->fence, -EIO);
        i915_gem_request_submit(request);
        intel_engine_init_global_seqno(request->engine, request->global_seqno);
 }
 
-static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
+static void engine_set_wedged(struct intel_engine_cs *engine)
 {
+       struct drm_i915_gem_request *request;
+       unsigned long flags;
+
+       /* We need to be sure that no thread is running the old callback as
+        * we install the nop handler (otherwise we would submit a request
+        * to hardware that will never complete). In order to prevent this
+        * race, we wait until the machine is idle before making the swap
+        * (using stop_machine()).
+        */
        engine->submit_request = nop_submit_request;
 
+       /* Mark all executing requests as skipped */
+       spin_lock_irqsave(&engine->timeline->lock, flags);
+       list_for_each_entry(request, &engine->timeline->requests, link)
+               dma_fence_set_error(&request->fence, -EIO);
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
        /* Mark all pending requests as complete so that any concurrent
         * (lockless) lookup doesn't try and wait upon the request as we
         * reset it.
@@ -2815,20 +2969,29 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
        }
 }
 
-void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+static int __i915_gem_set_wedged_BKL(void *data)
 {
+       struct drm_i915_private *i915 = data;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
+       for_each_engine(engine, i915, id)
+               engine_set_wedged(engine);
+
+       return 0;
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+{
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
        set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
-       i915_gem_context_lost(dev_priv);
-       for_each_engine(engine, dev_priv, id)
-               i915_gem_cleanup_engine(engine);
-       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
+       stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
 
+       i915_gem_context_lost(dev_priv);
        i915_gem_retire_requests(dev_priv);
+
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
 
 static void
@@ -2874,8 +3037,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
         * new request is submitted.
         */
        wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
-                intel_execlists_idle(dev_priv), 10);
-
+                intel_engines_are_idle(dev_priv),
+                10);
        if (READ_ONCE(dev_priv->gt.active_requests))
                return;
 
@@ -2900,11 +3063,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
        if (dev_priv->gt.active_requests)
                goto out_unlock;
 
-       if (wait_for(intel_execlists_idle(dev_priv), 10))
+       if (wait_for(intel_engines_are_idle(dev_priv), 10))
                DRM_ERROR("Timeout waiting for engines to idle\n");
 
-       for_each_engine(engine, dev_priv, id)
+       for_each_engine(engine, dev_priv, id) {
+               intel_engine_disarm_breadcrumbs(engine);
                i915_gem_batch_pool_fini(&engine->batch_pool);
+       }
 
        GEM_BUG_ON(!dev_priv->gt.awake);
        dev_priv->gt.awake = false;
@@ -3003,6 +3168,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
                if (args->timeout_ns < 0)
                        args->timeout_ns = 0;
+
+               /*
+                * Apparently ktime isn't accurate enough and occasionally has a
+                * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+                * things up to make the test happy. We allow up to 1 jiffy.
+                *
+                * This is a regression from the timespec->ktime conversion.
+                */
+               if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+                       args->timeout_ns = 0;
        }
 
        i915_gem_object_put(obj);
@@ -3045,41 +3220,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
        return 0;
 }
 
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-                            bool force)
-{
-       /* If we don't have a page list set up, then we're not pinned
-        * to GPU, and we can ignore the cache flush because it'll happen
-        * again at bind time.
-        */
-       if (!obj->mm.pages)
-               return;
-
-       /*
-        * Stolen memory is always coherent with the GPU as it is explicitly
-        * marked as wc by the system, or the system is cache-coherent.
-        */
-       if (obj->stolen || obj->phys_handle)
-               return;
-
-       /* If the GPU is snooping the contents of the CPU cache,
-        * we do not need to manually clear the CPU cache lines.  However,
-        * the caches are only snooped when the render cache is
-        * flushed/invalidated.  As we always have to emit invalidations
-        * and flushes when moving into and out of the RENDER domain, correct
-        * snooping behaviour occurs naturally as the result of our domain
-        * tracking.
-        */
-       if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
-               obj->cache_dirty = true;
-               return;
-       }
-
-       trace_i915_gem_object_clflush(obj);
-       drm_clflush_sg(obj->mm.pages);
-       obj->cache_dirty = false;
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3108,12 +3248,9 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
        if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
                POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
 
-       intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
+       intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
        obj->base.write_domain = 0;
-       trace_i915_gem_object_change_domain(obj,
-                                           obj->base.read_domains,
-                                           I915_GEM_DOMAIN_GTT);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
@@ -3123,13 +3260,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                return;
 
-       i915_gem_clflush_object(obj, obj->pin_display);
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+       i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+       obj->base.write_domain = 0;
+}
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
+               return;
 
+       i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
        obj->base.write_domain = 0;
-       trace_i915_gem_object_change_domain(obj,
-                                           obj->base.read_domains,
-                                           I915_GEM_DOMAIN_CPU);
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+       if (!READ_ONCE(obj->pin_display))
+               return;
+
+       mutex_lock(&obj->base.dev->struct_mutex);
+       __i915_gem_object_flush_for_display(obj);
+       mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
 /**
@@ -3143,7 +3294,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
-       uint32_t old_write_domain, old_read_domains;
        int ret;
 
        lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3181,9 +3331,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
                mb();
 
-       old_write_domain = obj->base.write_domain;
-       old_read_domains = obj->base.read_domains;
-
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
@@ -3195,10 +3342,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
                obj->mm.dirty = true;
        }
 
-       trace_i915_gem_object_change_domain(obj,
-                                           old_read_domains,
-                                           old_write_domain);
-
        i915_gem_object_unpin_pages(obj);
        return 0;
 }
@@ -3323,7 +3466,7 @@ restart:
        }
 
        if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
-           cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+           i915_gem_object_is_coherent(obj))
                obj->cache_dirty = true;
 
        list_for_each_entry(vma, &obj->vma_list, obj_link)
@@ -3373,7 +3516,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
        struct drm_i915_gem_caching *args = data;
        struct drm_i915_gem_object *obj;
        enum i915_cache_level level;
-       int ret;
+       int ret = 0;
 
        switch (args->caching) {
        case I915_CACHING_NONE:
@@ -3398,20 +3541,29 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
-       ret = i915_mutex_lock_interruptible(dev);
+       obj = i915_gem_object_lookup(file, args->handle);
+       if (!obj)
+               return -ENOENT;
+
+       if (obj->cache_level == level)
+               goto out;
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  to_rps_client(file));
        if (ret)
-               return ret;
+               goto out;
 
-       obj = i915_gem_object_lookup(file, args->handle);
-       if (!obj) {
-               ret = -ENOENT;
-               goto unlock;
-       }
+       ret = i915_mutex_lock_interruptible(dev);
+       if (ret)
+               goto out;
 
        ret = i915_gem_object_set_cache_level(obj, level);
-       i915_gem_object_put(obj);
-unlock:
        mutex_unlock(&dev->struct_mutex);
+
+out:
+       i915_gem_object_put(obj);
        return ret;
 }
 
@@ -3426,7 +3578,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     const struct i915_ggtt_view *view)
 {
        struct i915_vma *vma;
-       u32 old_read_domains, old_write_domain;
        int ret;
 
        lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3461,7 +3612,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
         * try to preserve the existing ABI).
         */
        vma = ERR_PTR(-ENOSPC);
-       if (view->type == I915_GGTT_VIEW_NORMAL)
+       if (!view || view->type == I915_GGTT_VIEW_NORMAL)
                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
                                               PIN_MAPPABLE | PIN_NONBLOCK);
        if (IS_ERR(vma)) {
@@ -3486,24 +3637,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
        /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-       if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
-               i915_gem_clflush_object(obj, true);
-               intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
-       }
-
-       old_write_domain = obj->base.write_domain;
-       old_read_domains = obj->base.read_domains;
+       __i915_gem_object_flush_for_display(obj);
+       intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
-       obj->base.write_domain = 0;
        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
-       trace_i915_gem_object_change_domain(obj,
-                                           old_read_domains,
-                                           old_write_domain);
-
        return vma;
 
 err_unpin_display:
@@ -3514,17 +3655,16 @@ err_unpin_display:
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-       lockdep_assert_held(&vma->vm->dev->struct_mutex);
+       lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
        if (WARN_ON(vma->obj->pin_display == 0))
                return;
 
        if (--vma->obj->pin_display == 0)
-               vma->display_alignment = 0;
+               vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
        /* Bump the LRU to try and avoid premature eviction whilst flipping  */
-       if (!i915_vma_is_active(vma))
-               list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+       i915_gem_object_bump_inactive_ggtt(vma->obj);
 
        i915_vma_unpin(vma);
 }
@@ -3540,7 +3680,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 int
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
-       uint32_t old_write_domain, old_read_domains;
        int ret;
 
        lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3559,13 +3698,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
        i915_gem_object_flush_gtt_write_domain(obj);
 
-       old_write_domain = obj->base.write_domain;
-       old_read_domains = obj->base.read_domains;
-
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               i915_gem_clflush_object(obj, false);
-
+               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
                obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
        }
 
@@ -3582,10 +3717,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
                obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
 
-       trace_i915_gem_object_change_domain(obj,
-                                           old_read_domains,
-                                           old_write_domain);
-
        return 0;
 }
 
@@ -3613,16 +3744,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
                return -EIO;
 
        spin_lock(&file_priv->mm.lock);
-       list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
+       list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
                if (time_after_eq(request->emitted_jiffies, recent_enough))
                        break;
 
-               /*
-                * Note that the request might not have been submitted yet.
-                * In which case emitted_jiffies will be zero.
-                */
-               if (!request->emitted_jiffies)
-                       continue;
+               if (target) {
+                       list_del(&target->client_link);
+                       target->file_priv = NULL;
+               }
 
                target = request;
        }
@@ -3655,8 +3784,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
        lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-       vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
-       if (IS_ERR(vma))
+       vma = i915_vma_instance(obj, vm, view);
+       if (unlikely(IS_ERR(vma)))
                return vma;
 
        if (i915_vma_misplaced(vma, size, alignment, flags)) {
@@ -3665,10 +3794,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                        return ERR_PTR(-ENOSPC);
 
                if (flags & PIN_MAPPABLE) {
-                       u32 fence_size;
-
-                       fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
-                                                           i915_gem_object_get_tiling(obj));
                        /* If the required space is larger than the available
                         * aperture, we will not able to find a slot for the
                         * object and unbinding the object now will be in
@@ -3676,7 +3801,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                         * the object in and out of the Global GTT and
                         * waste a lot of cycles under the mutex.
                         */
-                       if (fence_size > dev_priv->ggtt.mappable_end)
+                       if (vma->fence_size > dev_priv->ggtt.mappable_end)
                                return ERR_PTR(-E2BIG);
 
                        /* If NONBLOCK is set the caller is optimistically
@@ -3695,7 +3820,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                         * we could try to minimise harm to others.
                         */
                        if (flags & PIN_NONBLOCK &&
-                           fence_size > dev_priv->ggtt.mappable_end / 2)
+                           vma->fence_size > dev_priv->ggtt.mappable_end / 2)
                                return ERR_PTR(-ENOSPC);
                }
 
@@ -3912,7 +4037,7 @@ frontbuffer_retire(struct i915_gem_active *active,
        struct drm_i915_gem_object *obj =
                container_of(active, typeof(*obj), frontbuffer_write);
 
-       intel_fb_obj_flush(obj, true, ORIGIN_CS);
+       intel_fb_obj_flush(obj, ORIGIN_CS);
 }
 
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
@@ -3944,18 +4069,16 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
                 I915_GEM_OBJECT_IS_SHRINKABLE,
+
        .get_pages = i915_gem_object_get_pages_gtt,
        .put_pages = i915_gem_object_put_pages_gtt,
-};
 
-/* Note we don't consider signbits :| */
-#define overflows_type(x, T) \
-       (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
+       .pwrite = i915_gem_object_pwrite_gtt,
+};
 
 struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_device *dev, u64 size)
+i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_gem_object *obj;
        struct address_space *mapping;
        gfp_t mask;
@@ -3972,16 +4095,16 @@ i915_gem_object_create(struct drm_device *dev, u64 size)
        if (overflows_type(size, obj->base.size))
                return ERR_PTR(-E2BIG);
 
-       obj = i915_gem_object_alloc(dev);
+       obj = i915_gem_object_alloc(dev_priv);
        if (obj == NULL)
                return ERR_PTR(-ENOMEM);
 
-       ret = drm_gem_object_init(dev, &obj->base, size);
+       ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size);
        if (ret)
                goto fail;
 
        mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-       if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
+       if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
                /* 965gm cannot relocate objects above 4GiB. */
                mask &= ~__GFP_HIGHMEM;
                mask |= __GFP_DMA32;
@@ -4174,14 +4297,33 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
        enum intel_engine_id id;
 
        for_each_engine(engine, dev_priv, id)
-               GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
+               GEM_BUG_ON(engine->last_retired_context &&
+                          !i915_gem_context_is_kernel(engine->last_retired_context));
 }
 
-int i915_gem_suspend(struct drm_device *dev)
+void i915_gem_sanitize(struct drm_i915_private *i915)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       /*
+        * If we inherit context state from the BIOS or earlier occupants
+        * of the GPU, the GPU may be in an inconsistent state when we
+        * try to take over. The only way to remove the earlier state
+        * is by resetting. However, resetting on earlier gen is tricky as
+        * it may impact the display and we are uncertain about the stability
+        * of the reset, so we only reset recent machines with logical
+        * context support (that must be reset to remove any stray contexts).
+        */
+       if (HAS_HW_CONTEXTS(i915)) {
+               int reset = intel_gpu_reset(i915, ALL_ENGINES);
+               WARN_ON(reset && reset != -ENODEV);
+       }
+}
+
+int i915_gem_suspend(struct drm_i915_private *dev_priv)
+{
+       struct drm_device *dev = &dev_priv->drm;
        int ret;
 
+       intel_runtime_pm_get(dev_priv);
        intel_suspend_gt_powersave(dev_priv);
 
        mutex_lock(&dev->struct_mutex);
@@ -4196,13 +4338,13 @@ int i915_gem_suspend(struct drm_device *dev)
         */
        ret = i915_gem_switch_to_kernel_context(dev_priv);
        if (ret)
-               goto err;
+               goto err_unlock;
 
        ret = i915_gem_wait_for_idle(dev_priv,
                                     I915_WAIT_INTERRUPTIBLE |
                                     I915_WAIT_LOCKED);
        if (ret)
-               goto err;
+               goto err_unlock;
 
        i915_gem_retire_requests(dev_priv);
        GEM_BUG_ON(dev_priv->gt.active_requests);
@@ -4213,14 +4355,20 @@ int i915_gem_suspend(struct drm_device *dev)
 
        cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
        cancel_delayed_work_sync(&dev_priv->gt.retire_work);
-       flush_delayed_work(&dev_priv->gt.idle_work);
-       flush_work(&dev_priv->mm.free_work);
+
+       /* As the idle_work is rearming if it detects a race, play safe and
+        * repeat the flush until it is definitely idle.
+        */
+       while (flush_delayed_work(&dev_priv->gt.idle_work))
+               ;
+
+       i915_gem_drain_freed_objects(dev_priv);
 
        /* Assert that we sucessfully flushed all the work and
         * reset the GPU back to its idle, low power state.
         */
        WARN_ON(dev_priv->gt.awake);
-       WARN_ON(!intel_execlists_idle(dev_priv));
+       WARN_ON(!intel_engines_are_idle(dev_priv));
 
        /*
         * Neither the BIOS, ourselves or any other kernel
@@ -4241,21 +4389,19 @@ int i915_gem_suspend(struct drm_device *dev)
         * machines is a good idea, we don't - just in case it leaves the
         * machine in an unusable condition.
         */
-       if (HAS_HW_CONTEXTS(dev_priv)) {
-               int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
-               WARN_ON(reset && reset != -ENODEV);
-       }
+       i915_gem_sanitize(dev_priv);
+       goto out_rpm_put;
 
-       return 0;
-
-err:
+err_unlock:
        mutex_unlock(&dev->struct_mutex);
+out_rpm_put:
+       intel_runtime_pm_put(dev_priv);
        return ret;
 }
 
-void i915_gem_resume(struct drm_device *dev)
+void i915_gem_resume(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct drm_device *dev = &dev_priv->drm;
 
        WARN_ON(dev_priv->gt.awake);
 
@@ -4319,12 +4465,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
        }
 }
 
-int
-i915_gem_init_hw(struct drm_device *dev)
+static int __i915_gem_restart_engines(void *data)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct drm_i915_private *i915 = data;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       int err;
+
+       for_each_engine(engine, i915, id) {
+               err = engine->init_hw(engine);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+int i915_gem_init_hw(struct drm_i915_private *dev_priv)
+{
        int ret;
 
        dev_priv->gt.last_init_time = ktime_get();
@@ -4370,16 +4528,14 @@ i915_gem_init_hw(struct drm_device *dev)
        }
 
        /* Need to do basic initialisation of all rings first: */
-       for_each_engine(engine, dev_priv, id) {
-               ret = engine->init_hw(engine);
-               if (ret)
-                       goto out;
-       }
+       ret = __i915_gem_restart_engines(dev_priv);
+       if (ret)
+               goto out;
 
-       intel_mocs_init_l3cc_table(dev);
+       intel_mocs_init_l3cc_table(dev_priv);
 
        /* We can't enable contexts until all firmware is loaded */
-       ret = intel_guc_setup(dev);
+       ret = intel_guc_setup(dev_priv);
        if (ret)
                goto out;
 
@@ -4409,12 +4565,13 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
        return true;
 }
 
-int i915_gem_init(struct drm_device *dev)
+int i915_gem_init(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
        int ret;
 
-       mutex_lock(&dev->struct_mutex);
+       mutex_lock(&dev_priv->drm.struct_mutex);
+
+       i915_gem_clflush_init(dev_priv);
 
        if (!i915.enable_execlists) {
                dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4438,15 +4595,15 @@ int i915_gem_init(struct drm_device *dev)
        if (ret)
                goto out_unlock;
 
-       ret = i915_gem_context_init(dev);
+       ret = i915_gem_context_init(dev_priv);
        if (ret)
                goto out_unlock;
 
-       ret = intel_engines_init(dev);
+       ret = intel_engines_init(dev_priv);
        if (ret)
                goto out_unlock;
 
-       ret = i915_gem_init_hw(dev);
+       ret = i915_gem_init_hw(dev_priv);
        if (ret == -EIO) {
                /* Allow engine initialisation to fail by marking the GPU as
                 * wedged. But we only want to do this where the GPU is angry,
@@ -4459,15 +4616,19 @@ int i915_gem_init(struct drm_device *dev)
 
 out_unlock:
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-       mutex_unlock(&dev->struct_mutex);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
 
        return ret;
 }
 
+void i915_gem_init_mmio(struct drm_i915_private *i915)
+{
+       i915_gem_sanitize(i915);
+}
+
 void
-i915_gem_cleanup_engines(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
@@ -4483,8 +4644,9 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
        if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
            !IS_CHERRYVIEW(dev_priv))
                dev_priv->num_fence_regs = 32;
-       else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
-                IS_I945GM(dev_priv) || IS_G33(dev_priv))
+       else if (INTEL_INFO(dev_priv)->gen >= 4 ||
+                IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
+                IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
                dev_priv->num_fence_regs = 16;
        else
                dev_priv->num_fence_regs = 8;
@@ -4507,9 +4669,8 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 }
 
 int
-i915_gem_load_init(struct drm_device *dev)
+i915_gem_load_init(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
        int err = -ENOMEM;
 
        dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
@@ -4554,8 +4715,6 @@ i915_gem_load_init(struct drm_device *dev)
        init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
        init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 
-       dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
-
        init_waitqueue_head(&dev_priv->pending_flip_queue);
 
        dev_priv->mm.interruptible = true;
@@ -4578,11 +4737,11 @@ err_out:
        return err;
 }
 
-void i915_gem_load_cleanup(struct drm_device *dev)
+void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-
+       i915_gem_drain_freed_objects(dev_priv);
        WARN_ON(!llist_empty(&dev_priv->mm.free_list));
+       WARN_ON(dev_priv->mm.object_count);
 
        mutex_lock(&dev_priv->drm.struct_mutex);
        i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
@@ -4600,14 +4759,10 @@ void i915_gem_load_cleanup(struct drm_device *dev)
 
 int i915_gem_freeze(struct drm_i915_private *dev_priv)
 {
-       intel_runtime_pm_get(dev_priv);
-
        mutex_lock(&dev_priv->drm.struct_mutex);
        i915_gem_shrink_all(dev_priv);
        mutex_unlock(&dev_priv->drm.struct_mutex);
 
-       intel_runtime_pm_put(dev_priv);
-
        return 0;
 }
 
@@ -4658,7 +4813,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
         * file_priv.
         */
        spin_lock(&file_priv->mm.lock);
-       list_for_each_entry(request, &file_priv->mm.request_list, client_list)
+       list_for_each_entry(request, &file_priv->mm.request_list, client_link)
                request->file_priv = NULL;
        spin_unlock(&file_priv->mm.lock);
 
@@ -4732,7 +4887,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_device *dev,
+i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
                                 const void *data, size_t size)
 {
        struct drm_i915_gem_object *obj;
@@ -4740,7 +4895,7 @@ i915_gem_object_create_from_data(struct drm_device *dev,
        size_t bytes;
        int ret;
 
-       obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
+       obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
        if (IS_ERR(obj))
                return obj;
 
@@ -4922,3 +5077,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
        sg = i915_gem_object_get_sg(obj, n, &offset);
        return sg_dma_address(sg) + (offset << PAGE_SHIFT);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#include "selftests/mock_gem_device.c"
+#include "selftests/huge_gem_object.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif