drm/i915: Split execlist priority queue into rbtree + linked list

author Chris Wilson <chris@chris-wilson.co.uk>

Wed, 17 May 2017 12:10:03 +0000 (13:10 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 17 May 2017 12:38:09 +0000 (13:38 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Wed, 17 May 2017 12:10:03 +0000 (13:10 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 17 May 2017 12:38:09 +0000 (13:38 +0100)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index e08ac708e547a61b401472dfa121fdb5fc19a0c8..8abb93994c488aa9985cc9bac8fef1673e8138ef 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
  
                 if (i915.enable_execlists) {
                         u32 ptr, read, write;
-                       struct rb_node *rb;
                         unsigned int idx;
  
                         seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                         rcu_read_unlock();
  
                         spin_lock_irq(&engine->timeline->lock);
-                       for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-                               rq = rb_entry(rb, typeof(*rq), priotree.node);
-                               print_request(m, rq, "\t\tQ ");
+                       for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+                               struct i915_priolist *p =
+                                       rb_entry(rb, typeof(*p), node);
+
+                               list_for_each_entry(rq, &p->requests,
+                                                   priotree.link)
+                                       print_request(m, rq, "\t\tQ ");
                         }
                         spin_unlock_irq(&engine->timeline->lock);
                 } else if (INTEL_GEN(dev_priv) > 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 75d7575b81f4170acd27805e62c0abf5c04058d0..3d9161c8c1a1e5fbdca7da2b7af296c5f9e5a98b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
         struct drm_i915_private *dev_priv =
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
         bool rearm_hangcheck;
  
         if (!READ_ONCE(dev_priv->gt.awake))
@@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
         if (wait_for(intel_engines_are_idle(dev_priv), 10))
                 DRM_ERROR("Timeout waiting for engines to idle\n");
  
-       for_each_engine(engine, dev_priv, id) {
-               intel_engine_disarm_breadcrumbs(engine);
-               i915_gem_batch_pool_fini(&engine->batch_pool);
-       }
+       intel_engines_mark_idle(dev_priv);
         i915_gem_timelines_mark_idle(dev_priv);
  
         GEM_BUG_ON(!dev_priv->gt.awake);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 10361c7e3b377e6d8f793d0b5f14b81ee0b62aad..1ccf2522cdfd6d93f6ef23e61c6f913536052191 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
  {
         struct i915_dependency *dep, *next;
  
-       GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+       GEM_BUG_ON(!list_empty(&pt->link));
  
         /* Everyone we depended upon (the fences we wait to be signaled)
          * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
  {
         INIT_LIST_HEAD(&pt->signalers_list);
         INIT_LIST_HEAD(&pt->waiters_list);
-       RB_CLEAR_NODE(&pt->node);
+       INIT_LIST_HEAD(&pt->link);
         pt->priority = INT_MIN;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index 0ecfc5e2d707280563838df6010191b75ab9e64b..8c508bd9088eab5dfd1cab9356c2d1822c664dbd 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
  struct i915_priotree {
         struct list_head signalers_list; /* those before us, we depend upon */
         struct list_head waiters_list; /* those after us, they depend upon us */
-       struct rb_node node;
+       struct list_head link;
         int priority;
  #define I915_PRIORITY_MAX 1024
  #define I915_PRIORITY_NORMAL 0
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c

index 014cbd1a841e16d6725adaa4105f2ac6adbe1ce3..3b9cdb0907c21ea19a7aa7610f4519b1cfacf786 100644 (file)
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
  
         spin_lock_irq(&engine->timeline->lock);
         rb = engine->execlist_first;
+       GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
         while (rb) {
-               struct drm_i915_gem_request *rq =
-                       rb_entry(rb, typeof(*rq), priotree.node);
-
-               if (last && rq->ctx != last->ctx) {
-                       if (port != engine->execlist_port)
-                               break;
-
-                       port_assign(port, last);
-                       port++;
+               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+               struct drm_i915_gem_request *rq, *rn;
+
+               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+                       if (last && rq->ctx != last->ctx) {
+                               if (port != engine->execlist_port) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               port_assign(port, last);
+                               port++;
+                       }
+
+                       INIT_LIST_HEAD(&rq->priotree.link);
+                       rq->priotree.priority = INT_MAX;
+
+                       i915_guc_submit(rq);
+                       trace_i915_gem_request_in(rq, port_index(port, engine));
+                       last = rq;
+                       submit = true;
                 }
  
                 rb = rb_next(rb);
-               rb_erase(&rq->priotree.node, &engine->execlist_queue);
-               RB_CLEAR_NODE(&rq->priotree.node);
-               rq->priotree.priority = INT_MAX;
-
-               i915_guc_submit(rq);
-               trace_i915_gem_request_in(rq, port_index(port, engine));
-               last = rq;
-               submit = true;
+               rb_erase(&p->node, &engine->execlist_queue);
+               INIT_LIST_HEAD(&p->requests);
+               if (p->priority != I915_PRIORITY_NORMAL)
+                       kfree(p);
         }
-       if (submit) {
+done:
+       engine->execlist_first = rb;
+       if (submit)
                 port_assign(port, last);
-               engine->execlist_first = rb;
-       }
         spin_unlock_irq(&engine->timeline->lock);
  
         return submit;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h

index d9df23795f9a2726d7f256c148dbcb2202ab0d21..16ecd1ab108d48f6d7b036801a32f40c2e9ccf5d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
         __idx;                                                          \
  })
  
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+                                  struct list_head *first)
+{
+       first->prev = head;
+       WRITE_ONCE(head->next, first);
+}
+
  #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index e312decccaf1273d748614513115f64073e1600c..413bfd8d4bf489ec73d4ce7677a223bf4465dca3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1274,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
                 engine->set_default_submission(engine);
  }
  
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       for_each_engine(engine, i915, id) {
+               intel_engine_disarm_breadcrumbs(engine);
+               i915_gem_batch_pool_fini(&engine->batch_pool);
+               engine->no_priolist = false;
+       }
+}
+
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  #include "selftests/mock_engine.c"
  #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 53ec0d5713adc377f1360ccae27ab0252cfc9df4..626db6185a21e913025d59a7f8a925c627d3b750 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,75 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  
         spin_lock_irq(&engine->timeline->lock);
         rb = engine->execlist_first;
+       GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
         while (rb) {
-               struct drm_i915_gem_request *cursor =
-                       rb_entry(rb, typeof(*cursor), priotree.node);
-
-               /* Can we combine this request with the current port? It has to
-                * be the same context/ringbuffer and not have any exceptions
-                * (e.g. GVT saying never to combine contexts).
-                *
-                * If we can combine the requests, we can execute both by
-                * updating the RING_TAIL to point to the end of the second
-                * request, and so we never need to tell the hardware about
-                * the first.
-                */
-               if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-                       /* If we are on the second port and cannot combine
-                        * this request with the last, then we are done.
-                        */
-                       if (port != engine->execlist_port)
-                               break;
-
-                       /* If GVT overrides us we only ever submit port[0],
-                        * leaving port[1] empty. Note that we also have
-                        * to be careful that we don't queue the same
-                        * context (even though a different request) to
-                        * the second port.
+               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+               struct drm_i915_gem_request *rq, *rn;
+
+               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+                       /*
+                        * Can we combine this request with the current port?
+                        * It has to be the same context/ringbuffer and not
+                        * have any exceptions (e.g. GVT saying never to
+                        * combine contexts).
+                        *
+                        * If we can combine the requests, we can execute both
+                        * by updating the RING_TAIL to point to the end of the
+                        * second request, and so we never need to tell the
+                        * hardware about the first.
                          */
-                       if (ctx_single_port_submission(last->ctx) ||
-                           ctx_single_port_submission(cursor->ctx))
-                               break;
+                       if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+                               /*
+                                * If we are on the second port and cannot
+                                * combine this request with the last, then we
+                                * are done.
+                                */
+                               if (port != engine->execlist_port) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               /*
+                                * If GVT overrides us we only ever submit
+                                * port[0], leaving port[1] empty. Note that we
+                                * also have to be careful that we don't queue
+                                * the same context (even though a different
+                                * request) to the second port.
+                                */
+                               if (ctx_single_port_submission(last->ctx) ||
+                                   ctx_single_port_submission(rq->ctx)) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               GEM_BUG_ON(last->ctx == rq->ctx);
+
+                               if (submit)
+                                       port_assign(port, last);
+                               port++;
+                       }
  
-                       GEM_BUG_ON(last->ctx == cursor->ctx);
+                       INIT_LIST_HEAD(&rq->priotree.link);
+                       rq->priotree.priority = INT_MAX;
  
-                       if (submit)
-                               port_assign(port, last);
-                       port++;
+                       __i915_gem_request_submit(rq);
+                       trace_i915_gem_request_in(rq, port_index(port, engine));
+                       last = rq;
+                       submit = true;
                 }
  
                 rb = rb_next(rb);
-               rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-               RB_CLEAR_NODE(&cursor->priotree.node);
-               cursor->priotree.priority = INT_MAX;
-
-               __i915_gem_request_submit(cursor);
-               trace_i915_gem_request_in(cursor, port_index(port, engine));
-               last = cursor;
-               submit = true;
+               rb_erase(&p->node, &engine->execlist_queue);
+               INIT_LIST_HEAD(&p->requests);
+               if (p->priority != I915_PRIORITY_NORMAL)
+                       kfree(p);
         }
-       if (submit) {
+done:
+       engine->execlist_first = rb;
+       if (submit)
                 port_assign(port, last);
-               engine->execlist_first = rb;
-       }
         spin_unlock_irq(&engine->timeline->lock);
  
         if (submit)
@@ -610,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
         intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
  }
  
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+              struct i915_priotree *pt,
+              int prio)
  {
-       struct rb_node **p, *rb;
+       struct i915_priolist *p;
+       struct rb_node **parent, *rb;
         bool first = true;
  
+       if (unlikely(engine->no_priolist))
+               prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
         /* most positive priority is scheduled first, equal priorities fifo */
         rb = NULL;
-       p = &root->rb_node;
-       while (*p) {
-               struct i915_priotree *pos;
-
-               rb = *p;
-               pos = rb_entry(rb, typeof(*pos), node);
-               if (pt->priority > pos->priority) {
-                       p = &rb->rb_left;
-               } else {
-                       p = &rb->rb_right;
+       parent = &engine->execlist_queue.rb_node;
+       while (*parent) {
+               rb = *parent;
+               p = rb_entry(rb, typeof(*p), node);
+               if (prio > p->priority) {
+                       parent = &rb->rb_left;
+               } else if (prio < p->priority) {
+                       parent = &rb->rb_right;
                         first = false;
+               } else {
+                       list_add_tail(&pt->link, &p->requests);
+                       return false;
+               }
+       }
+
+       if (prio == I915_PRIORITY_NORMAL) {
+               p = &engine->default_priolist;
+       } else {
+               p = kmalloc(sizeof(*p), GFP_ATOMIC);
+               /* Convert an allocation failure to a priority bump */
+               if (unlikely(!p)) {
+                       prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+                       /* To maintain ordering with all rendering, after an
+                        * allocation failure we have to disable all scheduling.
+                        * Requests will then be executed in fifo, and schedule
+                        * will ensure that dependencies are emitted in fifo.
+                        * There will be still some reordering with existing
+                        * requests, so if userspace lied about their
+                        * dependencies that reordering may be visible.
+                        */
+                       engine->no_priolist = true;
+                       goto find_priolist;
                 }
         }
-       rb_link_node(&pt->node, rb, p);
-       rb_insert_color(&pt->node, root);
+
+       p->priority = prio;
+       rb_link_node(&p->node, rb, parent);
+       rb_insert_color(&p->node, &engine->execlist_queue);
+
+       INIT_LIST_HEAD(&p->requests);
+       list_add_tail(&pt->link, &p->requests);
+
+       if (first)
+               engine->execlist_first = &p->node;
  
         return first;
  }
@@ -644,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
         /* Will be called from irq-context when using foreign fences. */
         spin_lock_irqsave(&engine->timeline->lock, flags);
  
-       if (insert_request(&request->priotree, &engine->execlist_queue)) {
-               engine->execlist_first = &request->priotree.node;
+       if (insert_request(engine,
+                          &request->priotree,
+                          request->priotree.priority)) {
                 if (execlists_elsp_ready(engine))
                         tasklet_hi_schedule(&engine->irq_tasklet);
         }
  
+       GEM_BUG_ON(!engine->execlist_first);
+       GEM_BUG_ON(list_empty(&request->priotree.link));
+
         spin_unlock_irqrestore(&engine->timeline->lock, flags);
  }
  
@@ -734,10 +794,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
                         continue;
  
                 pt->priority = prio;
-               if (!RB_EMPTY_NODE(&pt->node)) {
-                       rb_erase(&pt->node, &engine->execlist_queue);
-                       if (insert_request(pt, &engine->execlist_queue))
-                               engine->execlist_first = &pt->node;
+               if (!list_empty(&pt->link)) {
+                       __list_del_entry(&pt->link);
+                       insert_request(engine, pt, prio);
                 }
         }
  
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index 162f0a9c6abe72bb43ae7cc2aaf41bdf85355005..6aa20ac8cde388613248f06a627213215a794948 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
         VECS
  };
  
+struct i915_priolist {
+       struct rb_node node;
+       struct list_head requests;
+       int priority;
+};
+
  #define INTEL_ENGINE_CS_MAX_NAME 8
  
  struct intel_engine_cs {
@@ -367,6 +373,8 @@ struct intel_engine_cs {
  
         /* Execlists */
         struct tasklet_struct irq_tasklet;
+       struct i915_priolist default_priolist;
+       bool no_priolist;
         struct execlist_port {
                 struct drm_i915_gem_request *request_count;
  #define EXECLIST_COUNT_BITS 2
@@ -723,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
  bool intel_engine_is_idle(struct intel_engine_cs *engine);
  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
  
+void intel_engines_mark_idle(struct drm_i915_private *i915);
  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
  
  #endif /* _INTEL_RINGBUFFER_H_ */
author	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 17 May 2017 12:10:03 +0000 (13:10 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 17 May 2017 12:38:09 +0000 (13:38 +0100)
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_guc_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_utils.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.h		patch \| blob \| history