]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'drm-intel-fixes-2017-06-20' of git://anongit.freedesktop.org/git/drm-intel...
authorDave Airlie <airlied@redhat.com>
Wed, 21 Jun 2017 01:22:34 +0000 (11:22 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 21 Jun 2017 01:22:34 +0000 (11:22 +1000)
drm/i915 fixes for v4.12-rc7

* tag 'drm-intel-fixes-2017-06-20' of git://anongit.freedesktop.org/git/drm-intel:
  drm/i915: Don't enable backlight at setup time.
  drm/i915: Plumb the correct acquire ctx into intel_crtc_disable_noatomic()
  drm/i915: Fix deadlock witha the pipe A quirk during resume
  drm/i915: Remove __GFP_NORETRY from our buffer allocator
  drm/i915: Encourage our shrinker more when our shmemfs allocations fails
  drm/i915: Differentiate between sw write location into ring and last hw read

drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_guc_submission.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp_aux_backlight.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 462031cbd77f714b23a3b7645039c0d8dba71f40..615f0a855222f630d07311c92dce17d3bd371298 100644 (file)
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
        struct page *page;
        unsigned long last_pfn = 0;     /* suppress gcc warning */
        unsigned int max_segment;
+       gfp_t noreclaim;
        int ret;
-       gfp_t gfp;
 
        /* Assert that the object is not currently in any GPU domain. As it
         * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
         * Fail silently without starting the shrinker
         */
        mapping = obj->base.filp->f_mapping;
-       gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-       gfp |= __GFP_NORETRY | __GFP_NOWARN;
+       noreclaim = mapping_gfp_constraint(mapping,
+                                          ~(__GFP_IO | __GFP_RECLAIM));
+       noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
        sg = st->sgl;
        st->nents = 0;
        for (i = 0; i < page_count; i++) {
-               page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               if (unlikely(IS_ERR(page))) {
-                       i915_gem_shrink(dev_priv,
-                                       page_count,
-                                       I915_SHRINK_BOUND |
-                                       I915_SHRINK_UNBOUND |
-                                       I915_SHRINK_PURGEABLE);
+               const unsigned int shrink[] = {
+                       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+                       0,
+               }, *s = shrink;
+               gfp_t gfp = noreclaim;
+
+               do {
                        page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               }
-               if (unlikely(IS_ERR(page))) {
-                       gfp_t reclaim;
+                       if (likely(!IS_ERR(page)))
+                               break;
+
+                       if (!*s) {
+                               ret = PTR_ERR(page);
+                               goto err_sg;
+                       }
+
+                       i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+                       cond_resched();
 
                        /* We've tried hard to allocate the memory by reaping
                         * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
                         * defer the oom here by reporting the ENOMEM back
                         * to userspace.
                         */
-                       reclaim = mapping_gfp_mask(mapping);
-                       reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
-                       page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-                       if (IS_ERR(page)) {
-                               ret = PTR_ERR(page);
-                               goto err_sg;
+                       if (!*s) {
+                               /* reclaim and warn, but no oom */
+                               gfp = mapping_gfp_mask(mapping);
+
+                               /* Our bo are always dirty and so we require
+                                * kswapd to reclaim our pages (direct reclaim
+                                * does not effectively begin pageout of our
+                                * buffers on its own). However, direct reclaim
+                                * only waits for kswapd when under allocation
+                                * congestion. So as a result __GFP_RECLAIM is
+                                * unreliable and fails to actually reclaim our
+                                * dirty pages -- unless you try over and over
+                                * again with !__GFP_NORETRY. However, we still
+                                * want to fail this allocation rather than
+                                * trigger the out-of-memory killer and for
+                                * this we want the future __GFP_MAYFAIL.
+                                */
                        }
-               }
+               } while (1);
+
                if (!i ||
                    sg->length >= max_segment ||
                    page_to_pfn(page) != last_pfn + 1) {
@@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 
        mapping = obj->base.filp->f_mapping;
        mapping_set_gfp_mask(mapping, mask);
+       GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
        i915_gem_object_init(obj, &i915_gem_object_ops);
 
index 5ddbc94997751adf5c9f04f7dd4a37a74d70de24..a74d0ac737cbeb7f9b9c5e93ea712a396e3c09d5 100644 (file)
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         * GPU processing the request, we never over-estimate the
         * position of the head.
         */
-       req->head = req->ring->tail;
+       req->head = req->ring->emit;
 
        /* Check that we didn't interrupt ourselves with a new request */
        GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
index 1642fff9cf135d5edbe85864d1b327d59002c026..ab5140ba108ddcb2c9c5382cc6439223704f9fda 100644 (file)
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
        GEM_BUG_ON(freespace < wqi_size);
 
        /* The GuC firmware wants the tail index in QWords, not bytes */
-       tail = rq->tail;
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       tail >>= 3;
+       tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
        GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
        /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
index 96b0b01677e26b22f382868f4b8b4c6dd738a4b3..9106ea32b048cac4783ae316d7cc198a4bf8ae88 100644 (file)
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
 static void skylake_pfit_enable(struct intel_crtc *crtc);
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
 static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+                                        struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 
 struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
        struct drm_crtc *crtc;
        int i, ret;
 
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, ctx);
        i915_redisable_vga(to_i915(dev));
 
        if (!state)
@@ -5825,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
                intel_update_watermarks(intel_crtc);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+                                       struct drm_modeset_acquire_ctx *ctx)
 {
        struct intel_encoder *encoder;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5855,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
                return;
        }
 
-       state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+       state->acquire_ctx = ctx;
 
        /* Everything's already locked, -EDEADLK can't happen. */
        crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -15030,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
        intel_setup_outputs(dev_priv);
 
        drm_modeset_lock_all(dev);
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
        drm_modeset_unlock_all(dev);
 
        for_each_intel_crtc(dev, crtc) {
@@ -15067,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
        return 0;
 }
 
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+                               struct drm_modeset_acquire_ctx *ctx)
 {
        struct intel_connector *connector;
        struct drm_connector_list_iter conn_iter;
        struct drm_connector *crt = NULL;
        struct intel_load_detect_pipe load_detect_temp;
-       struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
        int ret;
 
        /* We can't just switch on the pipe A, we need to set things up with a
@@ -15145,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
                (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+                               struct drm_modeset_acquire_ctx *ctx)
 {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15191,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                plane = crtc->plane;
                crtc->base.primary->state->visible = true;
                crtc->plane = !plane;
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
                crtc->plane = plane;
        }
 
@@ -15201,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                 * resume. Force-enable the pipe to fix this, the update_dpms
                 * call below we restore the pipe to the right state, but leave
                 * the required bits on. */
-               intel_enable_pipe_a(dev);
+               intel_enable_pipe_a(dev, ctx);
        }
 
        /* Adjust the state of the output pipe according to whether we
         * have active connectors/encoders. */
        if (crtc->active && !intel_crtc_has_encoders(crtc))
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
 
        if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
                /*
@@ -15505,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
  * and sanitizes it to the current state
  */
 static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+                            struct drm_modeset_acquire_ctx *ctx)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
        enum pipe pipe;
@@ -15525,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
        for_each_pipe(dev_priv, pipe) {
                crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
-               intel_sanitize_crtc(crtc);
+               intel_sanitize_crtc(crtc, ctx);
                intel_dump_pipe_config(crtc, crtc->config,
                                       "[setup_hw_state]");
        }
index 6532e226db29b63da766a8571de231de4f7261f6..40ba3134545ef7e339c5bfe347501eeb7715dac0 100644 (file)
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
        struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
        struct intel_panel *panel = &connector->panel;
 
-       intel_dp_aux_enable_backlight(connector);
-
        if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
                panel->backlight.max = 0xFFFF;
        else
index dac4e003c1f317ec402110132bad0c3a734bf52a..62f44d3e7c43c0d90df093050d5af6d3d68fe3a3 100644 (file)
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
                rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
        u32 *reg_state = ce->lrc_reg_state;
 
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       reg_state[CTX_RING_TAIL+1] = rq->tail;
+       reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
        /* True 32b PPGTT with dynamic page allocation: update PDP
         * registers and point the unallocated PDPs to scratch page.
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                        ce->state->obj->mm.dirty = true;
                        i915_gem_object_unpin_map(ce->state->obj);
 
-                       ce->ring->head = ce->ring->tail = 0;
-                       intel_ring_update_space(ce->ring);
+                       intel_ring_reset(ce->ring, 0);
                }
        }
 }
index 66a2b8b83972691d04f2737337e7ea6cf6a72851..513a0f4b469b32c9d0ac2e87c089bb6f2e4907ba 100644 (file)
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-       ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+       ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
        i915_gem_request_submit(request);
 
-       assert_ring_tail_valid(request->ring, request->tail);
-       I915_WRITE_TAIL(request->engine, request->tail);
+       I915_WRITE_TAIL(request->engine,
+                       intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
        return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+       GEM_BUG_ON(!list_empty(&ring->request_list));
+       ring->tail = tail;
+       ring->head = tail;
+       ring->emit = tail;
+       intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
        GEM_BUG_ON(!ring->vma);
        GEM_BUG_ON(!ring->vaddr);
 
+       /* Discard any unused bytes beyond that submitted to hw. */
+       intel_ring_reset(ring, ring->tail);
+
        if (i915_vma_is_map_and_fenceable(ring->vma))
                i915_vma_unpin_iomap(ring->vma);
        else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
+       /* Restart from the beginning of the rings for convenience */
        for_each_engine(engine, dev_priv, id)
-               engine->buffer->head = engine->buffer->tail;
+               intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
                unsigned space;
 
                /* Would completion of this request free enough space? */
-               space = __intel_ring_space(target->postfix, ring->tail,
+               space = __intel_ring_space(target->postfix, ring->emit,
                                           ring->size);
                if (space >= bytes)
                        break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
        struct intel_ring *ring = req->ring;
-       int remain_actual = ring->size - ring->tail;
-       int remain_usable = ring->effective_size - ring->tail;
+       int remain_actual = ring->size - ring->emit;
+       int remain_usable = ring->effective_size - ring->emit;
        int bytes = num_dwords * sizeof(u32);
        int total_bytes, wait_bytes;
        bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
        if (unlikely(need_wrap)) {
                GEM_BUG_ON(remain_actual > ring->space);
-               GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+               GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
                /* Fill the tail with MI_NOOP */
-               memset(ring->vaddr + ring->tail, 0, remain_actual);
-               ring->tail = 0;
+               memset(ring->vaddr + ring->emit, 0, remain_actual);
+               ring->emit = 0;
                ring->space -= remain_actual;
        }
 
-       GEM_BUG_ON(ring->tail > ring->size - bytes);
-       cs = ring->vaddr + ring->tail;
-       ring->tail += bytes;
+       GEM_BUG_ON(ring->emit > ring->size - bytes);
+       cs = ring->vaddr + ring->emit;
+       ring->emit += bytes;
        ring->space -= bytes;
        GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
        int num_dwords =
-               (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+               (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
        u32 *cs;
 
        if (num_dwords == 0)
index a82a0807f64dbd0624728fe3c65215abe3647565..f7144fe0961347826c62e620af879bb4db9f0d77 100644 (file)
@@ -145,6 +145,7 @@ struct intel_ring {
 
        u32 head;
        u32 tail;
+       u32 emit;
 
        int space;
        int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
         * reserved for the command packet (i.e. the value passed to
         * intel_ring_begin()).
         */
-       GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+       GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
        GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+       /* Whilst writes to the tail are strictly order, there is no
+        * serialisation between readers and the writers. The tail may be
+        * read by i915_gem_request_retire() just as it is being updated
+        * by execlists, as although the breadcrumb is complete, the context
+        * switch hasn't been seen.
+        */
+       assert_ring_tail_valid(ring, tail);
+       ring->tail = tail;
+       return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);