Merge branch 'akpm-current/current'

[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 399aab265db3cdd669bced4452d237ad78076b98..5cf4a1998273c3cfcc494c83210c0bc572f35c2e 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1005,12 +1005,14 @@ out:
                 if (!needs_clflush_after &&
                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                         if (i915_gem_clflush_object(obj, obj->pin_display))
-                               i915_gem_chipset_flush(dev);
+                               needs_clflush_after = true;
                 }
         }
  
         if (needs_clflush_after)
                 i915_gem_chipset_flush(dev);
+       else
+               obj->cache_dirty = true;
  
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
         return ret;
@@ -1711,8 +1713,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
  
  /**
   * i915_gem_fault - fault a page into the GTT
- * vma: VMA in question
- * vmf: fault info
+ * @vma: VMA in question
+ * @vmf: fault info
   *
   * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
   * from userspace.  The fault handler takes care of binding the object to
@@ -3205,7 +3207,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
                                             old_write_domain);
  }
  
-int i915_vma_unbind(struct i915_vma *vma)
+static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
  {
         struct drm_i915_gem_object *obj = vma->obj;
         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -3224,13 +3226,11 @@ int i915_vma_unbind(struct i915_vma *vma)
  
         BUG_ON(obj->pages == NULL);
  
-       ret = i915_gem_object_wait_rendering(obj, false);
-       if (ret)
-               return ret;
-       /* Continue on if we fail due to EIO, the GPU is hung so we
-        * should be safe and we need to cleanup or else we might
-        * cause memory corruption through use-after-free.
-        */
+       if (wait) {
+               ret = i915_gem_object_wait_rendering(obj, false);
+               if (ret)
+                       return ret;
+       }
  
         if (i915_is_ggtt(vma->vm) &&
             vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
@@ -3275,6 +3275,16 @@ int i915_vma_unbind(struct i915_vma *vma)
         return 0;
  }
  
+int i915_vma_unbind(struct i915_vma *vma)
+{
+       return __i915_vma_unbind(vma, true);
+}
+
+int __i915_vma_unbind_no_wait(struct i915_vma *vma)
+{
+       return __i915_vma_unbind(vma, false);
+}
+
  int i915_gpu_idle(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3354,11 +3364,10 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
  {
         struct drm_device *dev = obj->base.dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 size, fence_size, fence_alignment, unfenced_alignment;
-       u64 start =
-               flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-       u64 end =
-               flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
+       u32 fence_alignment, unfenced_alignment;
+       u32 search_flag, alloc_flag;
+       u64 start, end;
+       u64 size, fence_size;
         struct i915_vma *vma;
         int ret;
  
@@ -3398,6 +3407,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
                 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
         }
  
+       start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
+       end = vm->total;
+       if (flags & PIN_MAPPABLE)
+               end = min_t(u64, end, dev_priv->gtt.mappable_end);
+       if (flags & PIN_ZONE_4G)
+               end = min_t(u64, end, (1ULL << 32));
+
         if (alignment == 0)
                 alignment = flags & PIN_MAPPABLE ? fence_alignment :
                                                 unfenced_alignment;
@@ -3413,7 +3429,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
          * attempt to find space.
          */
         if (size > end) {
-               DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%llu\n",
+               DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
                           ggtt_view ? ggtt_view->type : 0,
                           size,
                           flags & PIN_MAPPABLE ? "mappable" : "total",
@@ -3433,13 +3449,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
         if (IS_ERR(vma))
                 goto err_unpin;
  
+       if (flags & PIN_HIGH) {
+               search_flag = DRM_MM_SEARCH_BELOW;
+               alloc_flag = DRM_MM_CREATE_TOP;
+       } else {
+               search_flag = DRM_MM_SEARCH_DEFAULT;
+               alloc_flag = DRM_MM_CREATE_DEFAULT;
+       }
+
  search_free:
         ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
                                                   size, alignment,
                                                   obj->cache_level,
                                                   start, end,
-                                                 DRM_MM_SEARCH_DEFAULT,
-                                                 DRM_MM_CREATE_DEFAULT);
+                                                 search_flag,
+                                                 alloc_flag);
         if (ret) {
                 ret = i915_gem_evict_something(dev, vm, size, alignment,
                                                obj->cache_level,
@@ -3632,59 +3656,117 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         return 0;
  }
  
+/**
+ * Changes the cache-level of an object across all VMA.
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                     enum i915_cache_level cache_level)
  {
         struct drm_device *dev = obj->base.dev;
         struct i915_vma *vma, *next;
-       int ret;
+       bool bound = false;
+       int ret = 0;
  
         if (obj->cache_level == cache_level)
-               return 0;
-
-       if (i915_gem_obj_is_pinned(obj)) {
-               DRM_DEBUG("can not change the cache level of pinned objects\n");
-               return -EBUSY;
-       }
+               goto out;
  
+       /* Inspect the list of currently bound VMA and unbind any that would
+        * be invalid given the new cache-level. This is principally to
+        * catch the issue of the CS prefetch crossing page boundaries and
+        * reading an invalid PTE on older architectures.
+        */
         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+               if (!drm_mm_node_allocated(&vma->node))
+                       continue;
+
+               if (vma->pin_count) {
+                       DRM_DEBUG("can not change the cache level of pinned objects\n");
+                       return -EBUSY;
+               }
+
                 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
                         ret = i915_vma_unbind(vma);
                         if (ret)
                                 return ret;
-               }
+               } else
+                       bound = true;
         }
  
-       if (i915_gem_obj_bound_any(obj)) {
+       /* We can reuse the existing drm_mm nodes but need to change the
+        * cache-level on the PTE. We could simply unbind them all and
+        * rebind with the correct cache-level on next use. However since
+        * we already have a valid slot, dma mapping, pages etc, we may as
+        * rewrite the PTE in the belief that doing so tramples upon less
+        * state and so involves less work.
+        */
+       if (bound) {
+               /* Before we change the PTE, the GPU must not be accessing it.
+                * If we wait upon the object, we know that all the bound
+                * VMA are no longer active.
+                */
                 ret = i915_gem_object_wait_rendering(obj, false);
                 if (ret)
                         return ret;
  
-               i915_gem_object_finish_gtt(obj);
-
-               /* Before SandyBridge, you could not use tiling or fence
-                * registers with snooped memory, so relinquish any fences
-                * currently pointing to our region in the aperture.
-                */
-               if (INTEL_INFO(dev)->gen < 6) {
+               if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+                       /* Access to snoopable pages through the GTT is
+                        * incoherent and on some machines causes a hard
+                        * lockup. Relinquish the CPU mmaping to force
+                        * userspace to refault in the pages and we can
+                        * then double check if the GTT mapping is still
+                        * valid for that pointer access.
+                        */
+                       i915_gem_release_mmap(obj);
+
+                       /* As we no longer need a fence for GTT access,
+                        * we can relinquish it now (and so prevent having
+                        * to steal a fence from someone else on the next
+                        * fence request). Note GPU activity would have
+                        * dropped the fence as all snoopable access is
+                        * supposed to be linear.
+                        */
                         ret = i915_gem_object_put_fence(obj);
                         if (ret)
                                 return ret;
+               } else {
+                       /* We either have incoherent backing store and
+                        * so no GTT access or the architecture is fully
+                        * coherent. In such cases, existing GTT mmaps
+                        * ignore the cache bit in the PTE and we can
+                        * rewrite it without confusing the GPU or having
+                        * to force userspace to fault back in its mmaps.
+                        */
                 }
  
-               list_for_each_entry(vma, &obj->vma_list, vma_link)
-                       if (drm_mm_node_allocated(&vma->node)) {
-                               ret = i915_vma_bind(vma, cache_level,
-                                                   PIN_UPDATE);
-                               if (ret)
-                                       return ret;
-                       }
+               list_for_each_entry(vma, &obj->vma_list, vma_link) {
+                       if (!drm_mm_node_allocated(&vma->node))
+                               continue;
+
+                       ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+                       if (ret)
+                               return ret;
+               }
         }
  
         list_for_each_entry(vma, &obj->vma_list, vma_link)
                 vma->node.color = cache_level;
         obj->cache_level = cache_level;
  
+out:
+       /* Flush the dirty CPU caches to the backing storage so that the
+        * object is now coherent at its new cache level (with respect
+        * to the access domain).
+        */
         if (obj->cache_dirty &&
             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
             cpu_write_needs_clflush(obj)) {
@@ -3737,6 +3819,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
                 level = I915_CACHE_NONE;
                 break;
         case I915_CACHING_CACHED:
+               /*
+                * Due to a HW issue on BXT A stepping, GPU stores via a
+                * snooped mapping may leave stale data in a corresponding CPU
+                * cacheline, whereas normally such cachelines would get
+                * invalidated.
+                */
+               if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
+                       return -ENODEV;
+
                 level = I915_CACHE_LLC;
                 break;
         case I915_CACHING_DISPLAY:
@@ -4010,15 +4101,13 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                         return -EBUSY;
  
                 if (i915_vma_misplaced(vma, alignment, flags)) {
-                       unsigned long offset;
-                       offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) :
-                                            i915_gem_obj_offset(obj, vm);
                         WARN(vma->pin_count,
                              "bo is already pinned in %s with incorrect alignment:"
-                            " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
+                            " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
                              " obj->map_and_fenceable=%d\n",
                              ggtt_view ? "ggtt" : "ppgtt",
-                            offset,
+                            upper_32_bits(vma->node.start),
+                            lower_32_bits(vma->node.start),
                              alignment,
                              !!(flags & PIN_MAPPABLE),
                              obj->map_and_fenceable);
@@ -4525,22 +4614,6 @@ void i915_gem_init_swizzling(struct drm_device *dev)
                 BUG();
  }
  
-static bool
-intel_enable_blt(struct drm_device *dev)
-{
-       if (!HAS_BLT(dev))
-               return false;
-
-       /* The blitter was dysfunctional on early prototypes */
-       if (IS_GEN6(dev) && dev->pdev->revision < 8) {
-               DRM_INFO("BLT not supported on this pre-production hardware;"
-                        " graphics performance will be degraded.\n");
-               return false;
-       }
-
-       return true;
-}
-
  static void init_unused_ring(struct drm_device *dev, u32 base)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4583,7 +4656,7 @@ int i915_gem_init_rings(struct drm_device *dev)
                         goto cleanup_render_ring;
         }
  
-       if (intel_enable_blt(dev)) {
+       if (HAS_BLT(dev)) {
                 ret = intel_init_blt_ring_buffer(dev);
                 if (ret)
                         goto cleanup_bsd_ring;
@@ -4601,14 +4674,8 @@ int i915_gem_init_rings(struct drm_device *dev)
                         goto cleanup_vebox_ring;
         }
  
-       ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-       if (ret)
-               goto cleanup_bsd2_ring;
-
         return 0;
  
-cleanup_bsd2_ring:
-       intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
  cleanup_vebox_ring:
         intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
  cleanup_blt_ring:
@@ -4678,6 +4745,33 @@ i915_gem_init_hw(struct drm_device *dev)
                         goto out;
         }
  
+       /* We can't enable contexts until all firmware is loaded */
+       if (HAS_GUC_UCODE(dev)) {
+               ret = intel_guc_ucode_load(dev);
+               if (ret) {
+                       /*
+                        * If we got an error and GuC submission is enabled, map
+                        * the error to -EIO so the GPU will be declared wedged.
+                        * OTOH, if we didn't intend to use the GuC anyway, just
+                        * discard the error and carry on.
+                        */
+                       DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
+                                 i915.enable_guc_submission ? "" :
+                                 " (ignored)");
+                       ret = i915.enable_guc_submission ? -EIO : 0;
+                       if (ret)
+                               goto out;
+               }
+       }
+
+       /*
+        * Increment the next seqno by 0x100 so we have a visible break
+        * on re-initialisation
+        */
+       ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
+       if (ret)
+               goto out;
+
         /* Now it is safe to go back round and do everything else: */
         for_each_ring(ring, dev_priv, i) {
                 struct drm_i915_gem_request *req;
@@ -4815,18 +4909,6 @@ init_ring_lists(struct intel_engine_cs *ring)
         INIT_LIST_HEAD(&ring->request_list);
  }
  
-void i915_init_vm(struct drm_i915_private *dev_priv,
-                 struct i915_address_space *vm)
-{
-       if (!i915_is_ggtt(vm))
-               drm_mm_init(&vm->mm, vm->start, vm->total);
-       vm->dev = dev_priv->dev;
-       INIT_LIST_HEAD(&vm->active_list);
-       INIT_LIST_HEAD(&vm->inactive_list);
-       INIT_LIST_HEAD(&vm->global_link);
-       list_add_tail(&vm->global_link, &dev_priv->vm_list);
-}
-
  void
  i915_gem_load(struct drm_device *dev)
  {
@@ -4850,8 +4932,6 @@ i915_gem_load(struct drm_device *dev)
                                   NULL);
  
         INIT_LIST_HEAD(&dev_priv->vm_list);
-       i915_init_vm(dev_priv, &dev_priv->gtt.base);
-
         INIT_LIST_HEAD(&dev_priv->context_list);
         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
@@ -4879,6 +4959,14 @@ i915_gem_load(struct drm_device *dev)
                 dev_priv->num_fence_regs =
                                 I915_READ(vgtif_reg(avail_rs.fence_num));
  
+       /*
+        * Set initial sequence number for requests.
+        * Using this number allows the wraparound to happen early,
+        * catching any obvious problems.
+        */
+       dev_priv->next_seqno = ((u32)~0 - 0x1100);
+       dev_priv->last_seqno = ((u32)~0 - 0x1101);
+
         /* Initialize fence registers to zero */
         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
         i915_gem_restore_fences(dev);
@@ -4948,9 +5036,9 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
  
  /**
   * i915_gem_track_fb - update frontbuffer tracking
- * old: current GEM buffer for the frontbuffer slots
- * new: new GEM buffer for the frontbuffer slots
- * frontbuffer_bits: bitmask of frontbuffer slots
+ * @old: current GEM buffer for the frontbuffer slots
+ * @new: new GEM buffer for the frontbuffer slots
+ * @frontbuffer_bits: bitmask of frontbuffer slots
   *
   * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
   * from @old and setting them in @new. Both @old and @new can be NULL.
@@ -4973,9 +5061,8 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  }
  
  /* All the new VM stuff */
-unsigned long
-i915_gem_obj_offset(struct drm_i915_gem_object *o,
-                   struct i915_address_space *vm)
+u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
+                       struct i915_address_space *vm)
  {
         struct drm_i915_private *dev_priv = o->base.dev->dev_private;
         struct i915_vma *vma;
@@ -4995,9 +5082,8 @@ i915_gem_obj_offset(struct drm_i915_gem_object *o,
         return -1;
  }
  
-unsigned long
-i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
-                             const struct i915_ggtt_view *view)
+u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
+                                 const struct i915_ggtt_view *view)
  {
         struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
         struct i915_vma *vma;