Merge tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel...

author Dave Airlie <airlied@redhat.com>

Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)

committer Dave Airlie <airlied@redhat.com>

Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)
author Dave Airlie <airlied@redhat.com>
Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)
committer Dave Airlie <airlied@redhat.com>
Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug

index b00edd3b8800d2d327f7f5f14c537c61e63922bf..78c5c049a347bcfe416b59b168f632be121e4b50 100644 (file)
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
  
            If in doubt, say "N".
  
+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
  config DRM_I915_SELFTEST
         bool "Enable selftests upon driver load"
         depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile

index 2cf04504e494bd63e5623b2807bd623315485f1a..16dccf550412dcaccf69929d35160defc0a08725 100644 (file)
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
           i915_params.o \
           i915_pci.o \
            i915_suspend.o \
+         i915_syncmap.o \
           i915_sw_fence.o \
           i915_sysfs.o \
           intel_csr.o \
@@ -57,6 +58,7 @@ i915-y += i915_cmd_parser.o \
  
  # general-purpose microcontroller (GuC) support
  i915-y += intel_uc.o \
+         intel_guc_ct.o \
           intel_guc_log.o \
           intel_guc_loader.o \
           intel_huc.o \
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c

index b3c7c199200cd39cce7a5d1877374c94d33f63c2..80b3e16cf48c0a0819fb4a9514cb21e5e6820772 100644 (file)
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -280,10 +280,10 @@ static void ch7017_mode_set(struct intel_dvo_device *dvo,
                         (0 << CH7017_PHASE_DETECTOR_SHIFT);
         } else {
                 outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH;
-               lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
+               lvds_pll_feedback_div =
+                       CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
                         (2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
                         (3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
-               lvds_pll_feedback_div = 35;
                 lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) |
                         (0 << CH7017_PHASE_DETECTOR_SHIFT);
                 if (1) { /* XXX: dual channel panel detection.  Assume yes for now. */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c

index bada32b332378af50e5384d9d8a700b751f61b90..6ae286cb5804aee4342b30dfeda64159d54acf7d 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
         gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
                         workload->ctx_desc.lrca);
  
-       context_page_num = intel_lr_context_size(
-                       gvt->dev_priv->engine[ring_id]);
+       context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
  
         context_page_num = context_page_num >> PAGE_SHIFT;
  
@@ -181,6 +180,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
         struct intel_engine_cs *engine = dev_priv->engine[ring_id];
         struct drm_i915_gem_request *rq;
         struct intel_vgpu *vgpu = workload->vgpu;
+       struct intel_ring *ring;
         int ret;
  
         gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -199,8 +199,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
          * shadow_ctx pages invalid. So gvt need to pin itself. After update
          * the guest context, gvt can unpin the shadow_ctx safely.
          */
-       ret = engine->context_pin(engine, shadow_ctx);
-       if (ret) {
+       ring = engine->context_pin(engine, shadow_ctx);
+       if (IS_ERR(ring)) {
+               ret = PTR_ERR(ring);
                 gvt_vgpu_err("fail to pin shadow context\n");
                 workload->status = ret;
                 mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -330,8 +331,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
         gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
                         workload->ctx_desc.lrca);
  
-       context_page_num = intel_lr_context_size(
-                       gvt->dev_priv->engine[ring_id]);
+       context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
  
         context_page_num = context_page_num >> PAGE_SHIFT;
  
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c

index 7af100f844101c6abbf80273c49eb6227a94f8ff..f0cb22cc0dd6e52d715604550f14a049ac12e5b7 100644 (file)
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
                                 find_reg(engine, is_master, reg_addr);
  
                         if (!reg) {
-                               DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
-                                                reg_addr, *cmd, engine->exec_id);
+                               DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+                                                reg_addr, *cmd, engine->name);
                                 return false;
                         }
  
@@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
                                 desc->bits[i].mask;
  
                         if (dword != desc->bits[i].expected) {
-                               DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
+                               DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
                                                  *cmd,
                                                  desc->bits[i].mask,
                                                  desc->bits[i].expected,
-                                                dword, engine->exec_id);
+                                                dword, engine->name);
                                 return false;
                         }
                 }
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
  
                 if (*cmd == MI_BATCH_BUFFER_END) {
                         if (needs_clflush_after) {
-                               void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping);
+                               void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
                                 drm_clflush_virt_range(ptr,
                                                        (void *)(cmd + 1) - ptr);
                         }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 1c66108f433380aca1e1ef8aeb34ea6b57ccfe6b..7e0816ccdc217debbd597cc213d2c62129a152d8 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2482,8 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m,
                 client->wq_size, client->wq_offset, client->wq_tail);
  
         seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
-       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
-       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
  
         for_each_engine(engine, dev_priv, id) {
                 u64 submissions = client->submissions[id];
@@ -2494,42 +2492,34 @@ static void i915_guc_client_info(struct seq_file *m,
         seq_printf(m, "\tTotal: %llu\n", tot);
  }
  
-static int i915_guc_info(struct seq_file *m, void *data)
+static bool check_guc_submission(struct seq_file *m)
  {
         struct drm_i915_private *dev_priv = node_to_i915(m->private);
         const struct intel_guc *guc = &dev_priv->guc;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       u64 total;
  
         if (!guc->execbuf_client) {
                 seq_printf(m, "GuC submission %s\n",
                            HAS_GUC_SCHED(dev_priv) ?
                            "disabled" :
                            "not supported");
-               return 0;
+               return false;
         }
  
+       return true;
+}
+
+static int i915_guc_info(struct seq_file *m, void *data)
+{
+       struct drm_i915_private *dev_priv = node_to_i915(m->private);
+       const struct intel_guc *guc = &dev_priv->guc;
+
+       if (!check_guc_submission(m))
+               return 0;
+
         seq_printf(m, "Doorbell map:\n");
         seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
         seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
  
-       seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
-       seq_printf(m, "GuC action failure count: %u\n", guc->action_fail);
-       seq_printf(m, "GuC last action command: 0x%x\n", guc->action_cmd);
-       seq_printf(m, "GuC last action status: 0x%x\n", guc->action_status);
-       seq_printf(m, "GuC last action error code: %d\n", guc->action_err);
-
-       total = 0;
-       seq_printf(m, "\nGuC submissions:\n");
-       for_each_engine(engine, dev_priv, id) {
-               u64 submissions = guc->submissions[id];
-               total += submissions;
-               seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-                       engine->name, submissions, guc->last_seqno[id]);
-       }
-       seq_printf(m, "\t%s: %llu\n", "Total", total);
-
         seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
         i915_guc_client_info(m, dev_priv, guc->execbuf_client);
  
@@ -2540,36 +2530,99 @@ static int i915_guc_info(struct seq_file *m, void *data)
         return 0;
  }
  
-static int i915_guc_log_dump(struct seq_file *m, void *data)
+static int i915_guc_stage_pool(struct seq_file *m, void *data)
  {
         struct drm_i915_private *dev_priv = node_to_i915(m->private);
-       struct drm_i915_gem_object *obj;
-       int i = 0, pg;
+       const struct intel_guc *guc = &dev_priv->guc;
+       struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
+       struct i915_guc_client *client = guc->execbuf_client;
+       unsigned int tmp;
+       int index;
  
-       if (!dev_priv->guc.log.vma)
+       if (!check_guc_submission(m))
                 return 0;
  
-       obj = dev_priv->guc.log.vma->obj;
-       for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-               u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
+       for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
+               struct intel_engine_cs *engine;
+
+               if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
+                       continue;
+
+               seq_printf(m, "GuC stage descriptor %u:\n", index);
+               seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+               seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
+               seq_printf(m, "\tPriority: %d\n", desc->priority);
+               seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
+               seq_printf(m, "\tEngines used: 0x%x\n",
+                          desc->engines_used);
+               seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
+                          desc->db_trigger_phy,
+                          desc->db_trigger_cpu,
+                          desc->db_trigger_uk);
+               seq_printf(m, "\tProcess descriptor: 0x%x\n",
+                          desc->process_desc);
+               seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
+                          desc->wq_addr, desc->wq_size);
+               seq_putc(m, '\n');
+
+               for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
+                       u32 guc_engine_id = engine->guc_id;
+                       struct guc_execlist_context *lrc =
+                                               &desc->lrc[guc_engine_id];
+
+                       seq_printf(m, "\t%s LRC:\n", engine->name);
+                       seq_printf(m, "\t\tContext desc: 0x%x\n",
+                                  lrc->context_desc);
+                       seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
+                       seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
+                       seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
+                       seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
+                       seq_putc(m, '\n');
+               }
+       }
+
+       return 0;
+}
+
+static int i915_guc_log_dump(struct seq_file *m, void *data)
+{
+       struct drm_info_node *node = m->private;
+       struct drm_i915_private *dev_priv = node_to_i915(node);
+       bool dump_load_err = !!node->info_ent->data;
+       struct drm_i915_gem_object *obj = NULL;
+       u32 *log;
+       int i = 0;
+
+       if (dump_load_err)
+               obj = dev_priv->guc.load_err_log;
+       else if (dev_priv->guc.log.vma)
+               obj = dev_priv->guc.log.vma->obj;
  
-               for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-                       seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-                                  *(log + i), *(log + i + 1),
-                                  *(log + i + 2), *(log + i + 3));
+       if (!obj)
+               return 0;
  
-               kunmap_atomic(log);
+       log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+       if (IS_ERR(log)) {
+               DRM_DEBUG("Failed to pin object\n");
+               seq_puts(m, "(log data unaccessible)\n");
+               return PTR_ERR(log);
         }
  
+       for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+               seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+                          *(log + i), *(log + i + 1),
+                          *(log + i + 2), *(log + i + 3));
+
         seq_putc(m, '\n');
  
+       i915_gem_object_unpin_map(obj);
+
         return 0;
  }
  
  static int i915_guc_log_control_get(void *data, u64 *val)
  {
-       struct drm_device *dev = data;
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct drm_i915_private *dev_priv = data;
  
         if (!dev_priv->guc.log.vma)
                 return -EINVAL;
@@ -2581,14 +2634,13 @@ static int i915_guc_log_control_get(void *data, u64 *val)
  
  static int i915_guc_log_control_set(void *data, u64 val)
  {
-       struct drm_device *dev = data;
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct drm_i915_private *dev_priv = data;
         int ret;
  
         if (!dev_priv->guc.log.vma)
                 return -EINVAL;
  
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
         if (ret)
                 return ret;
  
@@ -2596,7 +2648,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
         ret = i915_guc_log_control(dev_priv, val);
         intel_runtime_pm_put(dev_priv);
  
-       mutex_unlock(&dev->struct_mutex);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
         return ret;
  }
  
@@ -2855,7 +2907,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
         seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
                    CSR_VERSION_MINOR(csr->version));
  
-       if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) {
+       if (IS_KABYLAKE(dev_priv) ||
+           (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6))) {
                 seq_printf(m, "DC3 -> DC5 count: %d\n",
                            I915_READ(SKL_CSR_DC3_DC5_COUNT));
                 seq_printf(m, "DC5 -> DC6 count: %d\n",
@@ -3043,36 +3096,6 @@ static void intel_connector_info(struct seq_file *m,
                 intel_seq_print_mode(m, 2, mode);
  }
  
-static bool cursor_active(struct drm_i915_private *dev_priv, int pipe)
-{
-       u32 state;
-
-       if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-               state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
-       else
-               state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
-       return state;
-}
-
-static bool cursor_position(struct drm_i915_private *dev_priv,
-                           int pipe, int *x, int *y)
-{
-       u32 pos;
-
-       pos = I915_READ(CURPOS(pipe));
-
-       *x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
-       if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
-               *x = -*x;
-
-       *y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
-       if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
-               *y = -*y;
-
-       return cursor_active(dev_priv, pipe);
-}
-
  static const char *plane_type(enum drm_plane_type type)
  {
         switch (type) {
@@ -3194,9 +3217,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
         seq_printf(m, "CRTC info\n");
         seq_printf(m, "---------\n");
         for_each_intel_crtc(dev, crtc) {
-               bool active;
                 struct intel_crtc_state *pipe_config;
-               int x, y;
  
                 drm_modeset_lock(&crtc->base.mutex, NULL);
                 pipe_config = to_intel_crtc_state(crtc->base.state);
@@ -3208,14 +3229,18 @@ static int i915_display_info(struct seq_file *m, void *unused)
                            yesno(pipe_config->dither), pipe_config->pipe_bpp);
  
                 if (pipe_config->base.active) {
+                       struct intel_plane *cursor =
+                               to_intel_plane(crtc->base.cursor);
+
                         intel_crtc_info(m, crtc);
  
-                       active = cursor_position(dev_priv, crtc->pipe, &x, &y);
-                       seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n",
-                                  yesno(crtc->cursor_base),
-                                  x, y, crtc->base.cursor->state->crtc_w,
-                                  crtc->base.cursor->state->crtc_h,
-                                  crtc->cursor_addr, yesno(active));
+                       seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
+                                  yesno(cursor->base.state->visible),
+                                  cursor->base.state->crtc_x,
+                                  cursor->base.state->crtc_y,
+                                  cursor->base.state->crtc_w,
+                                  cursor->base.state->crtc_h,
+                                  cursor->cursor.base);
                         intel_scaler_info(m, crtc);
                         intel_plane_info(m, crtc);
                 }
@@ -3316,7 +3341,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
  
                 if (i915.enable_execlists) {
                         u32 ptr, read, write;
-                       struct rb_node *rb;
+                       unsigned int idx;
  
                         seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
                                    I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3359,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                         if (read > write)
                                 write += GEN8_CSB_ENTRIES;
                         while (read < write) {
-                               unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+                               idx = ++read % GEN8_CSB_ENTRIES;
                                 seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
                                            idx,
                                            I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,28 +3367,30 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                         }
  
                         rcu_read_lock();
-                       rq = READ_ONCE(engine->execlist_port[0].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[0] count=%d, ",
-                                          engine->execlist_port[0].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[0] idle\n");
-                       }
-                       rq = READ_ONCE(engine->execlist_port[1].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[1] count=%d, ",
-                                          engine->execlist_port[1].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[1] idle\n");
+                       for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+                               unsigned int count;
+
+                               rq = port_unpack(&engine->execlist_port[idx],
+                                                &count);
+                               if (rq) {
+                                       seq_printf(m, "\t\tELSP[%d] count=%d, ",
+                                                  idx, count);
+                                       print_request(m, rq, "rq: ");
+                               } else {
+                                       seq_printf(m, "\t\tELSP[%d] idle\n",
+                                                  idx);
+                               }
                         }
                         rcu_read_unlock();
  
                         spin_lock_irq(&engine->timeline->lock);
-                       for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-                               rq = rb_entry(rb, typeof(*rq), priotree.node);
-                               print_request(m, rq, "\t\tQ ");
+                       for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+                               struct i915_priolist *p =
+                                       rb_entry(rb, typeof(*p), node);
+
+                               list_for_each_entry(rq, &p->requests,
+                                                   priotree.link)
+                                       print_request(m, rq, "\t\tQ ");
                         }
                         spin_unlock_irq(&engine->timeline->lock);
                 } else if (INTEL_GEN(dev_priv) > 6) {
@@ -3704,16 +3730,10 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
         if (len == 0)
                 return 0;
  
-       input_buffer = kmalloc(len + 1, GFP_KERNEL);
-       if (!input_buffer)
-               return -ENOMEM;
+       input_buffer = memdup_user_nul(ubuf, len);
+       if (IS_ERR(input_buffer))
+               return PTR_ERR(input_buffer);
  
-       if (copy_from_user(input_buffer, ubuf, len)) {
-               status = -EFAULT;
-               goto out;
-       }
-
-       input_buffer[len] = '\0';
         DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
  
         drm_connector_list_iter_begin(dev, &conn_iter);
@@ -3739,7 +3759,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
                 }
         }
         drm_connector_list_iter_end(&conn_iter);
-out:
         kfree(input_buffer);
         if (status < 0)
                 return status;
@@ -3900,6 +3919,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
                 num_levels = 3;
         else if (IS_VALLEYVIEW(dev_priv))
                 num_levels = 1;
+       else if (IS_G4X(dev_priv))
+               num_levels = 3;
         else
                 num_levels = ilk_wm_max_level(dev_priv) + 1;
  
@@ -3912,8 +3933,10 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
                  * - WM1+ latency values in 0.5us units
                  * - latencies are in us on gen9/vlv/chv
                  */
-               if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) ||
-                   IS_CHERRYVIEW(dev_priv))
+               if (INTEL_GEN(dev_priv) >= 9 ||
+                   IS_VALLEYVIEW(dev_priv) ||
+                   IS_CHERRYVIEW(dev_priv) ||
+                   IS_G4X(dev_priv))
                         latency *= 10;
                 else if (level > 0)
                         latency *= 5;
@@ -3974,7 +3997,7 @@ static int pri_wm_latency_open(struct inode *inode, struct file *file)
  {
         struct drm_i915_private *dev_priv = inode->i_private;
  
-       if (INTEL_GEN(dev_priv) < 5)
+       if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
                 return -ENODEV;
  
         return single_open(file, pri_wm_latency_show, dev_priv);
@@ -4016,6 +4039,8 @@ static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
                 num_levels = 3;
         else if (IS_VALLEYVIEW(dev_priv))
                 num_levels = 1;
+       else if (IS_G4X(dev_priv))
+               num_levels = 3;
         else
                 num_levels = ilk_wm_max_level(dev_priv) + 1;
  
@@ -4776,6 +4801,8 @@ static const struct drm_info_list i915_debugfs_list[] = {
         {"i915_guc_info", i915_guc_info, 0},
         {"i915_guc_load_status", i915_guc_load_status_info, 0},
         {"i915_guc_log_dump", i915_guc_log_dump, 0},
+       {"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
+       {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
         {"i915_huc_load_status", i915_huc_load_status_info, 0},
         {"i915_frequency_info", i915_frequency_info, 0},
         {"i915_hangcheck_info", i915_hangcheck_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 3036d4835b0fa7a3b366a31d0b6ed18fc7889ae1..7b8c72776f46ce82271e04361fd220e032f0314f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
         case I915_PARAM_HAS_EXEC_SOFTPIN:
         case I915_PARAM_HAS_EXEC_ASYNC:
         case I915_PARAM_HAS_EXEC_FENCE:
+       case I915_PARAM_HAS_EXEC_CAPTURE:
                 /* For the time being all of these are always true;
                  * if some supported hardware does not have one of these
                  * features this value needs to be provided from
@@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
         intel_uc_init_early(dev_priv);
         i915_memcpy_init_early(dev_priv);
  
-       ret = intel_engines_init_early(dev_priv);
-       if (ret)
-               return ret;
-
         ret = i915_workqueues_init(dev_priv);
         if (ret < 0)
                 goto err_engines;
@@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
         intel_init_audio_hooks(dev_priv);
         ret = i915_gem_load_init(dev_priv);
         if (ret < 0)
-               goto err_workqueues;
+               goto err_irq;
  
         intel_display_crc_init(dev_priv);
  
@@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
  
         return 0;
  
-err_workqueues:
+err_irq:
+       intel_irq_fini(dev_priv);
         i915_workqueues_cleanup(dev_priv);
  err_engines:
         i915_engines_cleanup(dev_priv);
@@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
  {
         i915_perf_fini(dev_priv);
         i915_gem_load_cleanup(dev_priv);
+       intel_irq_fini(dev_priv);
         i915_workqueues_cleanup(dev_priv);
         i915_engines_cleanup(dev_priv);
  }
@@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  
         ret = i915_mmio_setup(dev_priv);
         if (ret < 0)
-               goto put_bridge;
+               goto err_bridge;
  
         intel_uncore_init(dev_priv);
+
+       ret = intel_engines_init_mmio(dev_priv);
+       if (ret)
+               goto err_uncore;
+
         i915_gem_init_mmio(dev_priv);
  
         return 0;
  
-put_bridge:
+err_uncore:
+       intel_uncore_fini(dev_priv);
+err_bridge:
         pci_dev_put(dev_priv->bridge_dev);
  
         return ret;
@@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
         struct drm_i915_private *dev_priv;
         int ret;
  
-       /* Enable nuclear pageflip on ILK+, except vlv/chv */
-       if (!i915.nuclear_pageflip &&
-           (match_info->gen < 5 || match_info->has_gmch_display))
+       /* Enable nuclear pageflip on ILK+ */
+       if (!i915.nuclear_pageflip && match_info->gen < 5)
                 driver.driver_features &= ~DRIVER_ATOMIC;
  
         ret = -ENOMEM;
@@ -1272,10 +1277,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
  
         dev_priv->ipc_enabled = false;
  
-       /* Everything is in place, we can now relax! */
-       DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-                driver.name, driver.major, driver.minor, driver.patchlevel,
-                driver.date, pci_name(pdev), dev_priv->drm.primary->index);
         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
                 DRM_INFO("DRM_I915_DEBUG enabled\n");
         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index c9b0949f6c1a2aba281c9a4bbf8d8b2c9ede3785..35e161b5b90e696c5f913072ab4755692e39f2f5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -55,6 +55,7 @@
  #include "i915_reg.h"
  #include "i915_utils.h"
  
+#include "intel_uncore.h"
  #include "intel_bios.h"
  #include "intel_dpll_mgr.h"
  #include "intel_uc.h"
@@ -79,8 +80,8 @@
  
  #define DRIVER_NAME            "i915"
  #define DRIVER_DESC            "Intel Graphics"
-#define DRIVER_DATE            "20170403"
-#define DRIVER_TIMESTAMP       1491198738
+#define DRIVER_DATE            "20170529"
+#define DRIVER_TIMESTAMP       1496041258
  
  /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
   * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -114,6 +115,13 @@ typedef struct {
         fp; \
  })
  
+static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
+{
+       if (val.val == 0)
+               return true;
+       return false;
+}
+
  static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
  {
         uint_fixed_16_16_t fp;
@@ -152,8 +160,39 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
         return max;
  }
  
-static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
-                                                         uint32_t d)
+static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
+                                           uint_fixed_16_16_t d)
+{
+       return DIV_ROUND_UP(val.val, d.val);
+}
+
+static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
+                                               uint_fixed_16_16_t mul)
+{
+       uint64_t intermediate_val;
+       uint32_t result;
+
+       intermediate_val = (uint64_t) val * mul.val;
+       intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
+       WARN_ON(intermediate_val >> 32);
+       result = clamp_t(uint32_t, intermediate_val, 0, ~0);
+       return result;
+}
+
+static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
+                                            uint_fixed_16_16_t mul)
+{
+       uint64_t intermediate_val;
+       uint_fixed_16_16_t fp;
+
+       intermediate_val = (uint64_t) val.val * mul.val;
+       intermediate_val = intermediate_val >> 16;
+       WARN_ON(intermediate_val >> 32);
+       fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0);
+       return fp;
+}
+
+static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d)
  {
         uint_fixed_16_16_t fp, res;
  
@@ -162,8 +201,7 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
         return res;
  }
  
-static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
-                                                             uint32_t d)
+static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d)
  {
         uint_fixed_16_16_t res;
         uint64_t interm_val;
@@ -176,6 +214,17 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
         return res;
  }
  
+static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
+                                               uint_fixed_16_16_t d)
+{
+       uint64_t interm_val;
+
+       interm_val = (uint64_t)val << 16;
+       interm_val = DIV_ROUND_UP_ULL(interm_val, d.val);
+       WARN_ON(interm_val >> 32);
+       return clamp_t(uint32_t, interm_val, 0, ~0);
+}
+
  static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val,
                                                      uint_fixed_16_16_t mul)
  {
@@ -676,116 +725,6 @@ struct drm_i915_display_funcs {
         void (*load_luts)(struct drm_crtc_state *crtc_state);
  };
  
-enum forcewake_domain_id {
-       FW_DOMAIN_ID_RENDER = 0,
-       FW_DOMAIN_ID_BLITTER,
-       FW_DOMAIN_ID_MEDIA,
-
-       FW_DOMAIN_ID_COUNT
-};
-
-enum forcewake_domains {
-       FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
-       FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
-       FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
-       FORCEWAKE_ALL = (FORCEWAKE_RENDER |
-                        FORCEWAKE_BLITTER |
-                        FORCEWAKE_MEDIA)
-};
-
-#define FW_REG_READ  (1)
-#define FW_REG_WRITE (2)
-
-enum decoupled_power_domain {
-       GEN9_DECOUPLED_PD_BLITTER = 0,
-       GEN9_DECOUPLED_PD_RENDER,
-       GEN9_DECOUPLED_PD_MEDIA,
-       GEN9_DECOUPLED_PD_ALL
-};
-
-enum decoupled_ops {
-       GEN9_DECOUPLED_OP_WRITE = 0,
-       GEN9_DECOUPLED_OP_READ
-};
-
-enum forcewake_domains
-intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
-                              i915_reg_t reg, unsigned int op);
-
-struct intel_uncore_funcs {
-       void (*force_wake_get)(struct drm_i915_private *dev_priv,
-                              enum forcewake_domains domains);
-       void (*force_wake_put)(struct drm_i915_private *dev_priv,
-                              enum forcewake_domains domains);
-
-       uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
-                              i915_reg_t r, bool trace);
-       uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
-                              i915_reg_t r, bool trace);
-       uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
-                              i915_reg_t r, bool trace);
-       uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
-                              i915_reg_t r, bool trace);
-
-       void (*mmio_writeb)(struct drm_i915_private *dev_priv,
-                           i915_reg_t r, uint8_t val, bool trace);
-       void (*mmio_writew)(struct drm_i915_private *dev_priv,
-                           i915_reg_t r, uint16_t val, bool trace);
-       void (*mmio_writel)(struct drm_i915_private *dev_priv,
-                           i915_reg_t r, uint32_t val, bool trace);
-};
-
-struct intel_forcewake_range {
-       u32 start;
-       u32 end;
-
-       enum forcewake_domains domains;
-};
-
-struct intel_uncore {
-       spinlock_t lock; /** lock is also taken in irq contexts. */
-
-       const struct intel_forcewake_range *fw_domains_table;
-       unsigned int fw_domains_table_entries;
-
-       struct notifier_block pmic_bus_access_nb;
-       struct intel_uncore_funcs funcs;
-
-       unsigned fifo_count;
-
-       enum forcewake_domains fw_domains;
-       enum forcewake_domains fw_domains_active;
-
-       u32 fw_set;
-       u32 fw_clear;
-       u32 fw_reset;
-
-       struct intel_uncore_forcewake_domain {
-               enum forcewake_domain_id id;
-               enum forcewake_domains mask;
-               unsigned wake_count;
-               struct hrtimer timer;
-               i915_reg_t reg_set;
-               i915_reg_t reg_ack;
-       } fw_domain[FW_DOMAIN_ID_COUNT];
-
-       int unclaimed_mmio_check;
-};
-
-#define __mask_next_bit(mask) ({                                       \
-       int __idx = ffs(mask) - 1;                                      \
-       mask &= ~BIT(__idx);                                            \
-       __idx;                                                          \
-})
-
-/* Iterate over initialised fw domains */
-#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
-       for (tmp__ = (mask__); \
-            tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
-
-#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
-       for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
-
  #define CSR_VERSION(major, minor)      ((major) << 16 | (minor))
  #define CSR_VERSION_MAJOR(version)     ((version) >> 16)
  #define CSR_VERSION_MINOR(version)     ((version) & 0xffff)
@@ -821,8 +760,8 @@ struct intel_csr {
         func(has_gmbus_irq); \
         func(has_gmch_display); \
         func(has_guc); \
+       func(has_guc_ct); \
         func(has_hotplug); \
-       func(has_hw_contexts); \
         func(has_l3_dpf); \
         func(has_llc); \
         func(has_logical_ring_contexts); \
@@ -1025,6 +964,9 @@ struct i915_gpu_state {
                         u32 *pages[0];
                 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
  
+               struct drm_i915_error_object **user_bo;
+               long user_bo_count;
+
                 struct drm_i915_error_object *wa_ctx;
  
                 struct drm_i915_error_request {
@@ -1511,11 +1453,7 @@ struct i915_gem_mm {
         /** LRU list of objects with fence regs on them. */
         struct list_head fence_list;
  
-       /**
-        * Are we in a non-interruptible section of code like
-        * modesetting?
-        */
-       bool interruptible;
+       u64 unordered_timeline;
  
         /* the indicator for dispatch video commands on two BSD rings */
         atomic_t bsd_engine_dispatch_index;
@@ -1566,7 +1504,7 @@ struct i915_gpu_error {
          *
          * This is a counter which gets incremented when reset is triggered,
          *
-        * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+        * Before the reset commences, the I915_RESET_BACKOFF bit is set
          * meaning that any waiters holding onto the struct_mutex should
          * relinquish the lock immediately in order for the reset to start.
          *
@@ -1763,13 +1701,15 @@ struct ilk_wm_values {
         enum intel_ddb_partitioning partitioning;
  };
  
-struct vlv_pipe_wm {
+struct g4x_pipe_wm {
         uint16_t plane[I915_MAX_PLANES];
+       uint16_t fbc;
  };
  
-struct vlv_sr_wm {
+struct g4x_sr_wm {
         uint16_t plane;
         uint16_t cursor;
+       uint16_t fbc;
  };
  
  struct vlv_wm_ddl_values {
@@ -1777,13 +1717,22 @@ struct vlv_wm_ddl_values {
  };
  
  struct vlv_wm_values {
-       struct vlv_pipe_wm pipe[3];
-       struct vlv_sr_wm sr;
+       struct g4x_pipe_wm pipe[3];
+       struct g4x_sr_wm sr;
         struct vlv_wm_ddl_values ddl[3];
         uint8_t level;
         bool cxsr;
  };
  
+struct g4x_wm_values {
+       struct g4x_pipe_wm pipe[2];
+       struct g4x_sr_wm sr;
+       struct g4x_sr_wm hpll;
+       bool cxsr;
+       bool hpll_en;
+       bool fbc_en;
+};
+
  struct skl_ddb_entry {
         uint16_t start, end;    /* in number of blocks, 'end' is exclusive */
  };
@@ -2100,7 +2049,7 @@ struct i915_oa_ops {
                     size_t *offset);
  
         /**
-        * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK)
+        * @oa_buffer_check: Check for OA buffer data + update tail
          *
          * This is either called via fops or the poll check hrtimer (atomic
          * ctx) without any locks taken.
@@ -2113,7 +2062,7 @@ struct i915_oa_ops {
          * here, which will be handled gracefully - likely resulting in an
          * %EAGAIN error for userspace.
          */
-       bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
+       bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
  };
  
  struct intel_cdclk_state {
@@ -2127,6 +2076,7 @@ struct drm_i915_private {
         struct kmem_cache *vmas;
         struct kmem_cache *requests;
         struct kmem_cache *dependencies;
+       struct kmem_cache *priorities;
  
         const struct intel_device_info info;
  
@@ -2362,7 +2312,6 @@ struct drm_i915_private {
          */
         struct mutex av_mutex;
  
-       uint32_t hw_context_size;
         struct list_head context_list;
  
         u32 fdi_rx_config;
@@ -2413,6 +2362,7 @@ struct drm_i915_private {
                         struct ilk_wm_values hw;
                         struct skl_wm_values skl_hw;
                         struct vlv_wm_values vlv;
+                       struct g4x_wm_values g4x;
                 };
  
                 uint8_t max_level;
@@ -2454,11 +2404,14 @@ struct drm_i915_private {
                         wait_queue_head_t poll_wq;
                         bool pollin;
  
+                       /**
+                        * For rate limiting any notifications of spurious
+                        * invalid OA reports
+                        */
+                       struct ratelimit_state spurious_report_rs;
+
                         bool periodic;
                         int period_exponent;
-                       int timestamp_frequency;
-
-                       int tail_margin;
  
                         int metrics_set;
  
@@ -2472,6 +2425,70 @@ struct drm_i915_private {
                                 u8 *vaddr;
                                 int format;
                                 int format_size;
+
+                               /**
+                                * Locks reads and writes to all head/tail state
+                                *
+                                * Consider: the head and tail pointer state
+                                * needs to be read consistently from a hrtimer
+                                * callback (atomic context) and read() fop
+                                * (user context) with tail pointer updates
+                                * happening in atomic context and head updates
+                                * in user context and the (unlikely)
+                                * possibility of read() errors needing to
+                                * reset all head/tail state.
+                                *
+                                * Note: Contention or performance aren't
+                                * currently a significant concern here
+                                * considering the relatively low frequency of
+                                * hrtimer callbacks (5ms period) and that
+                                * reads typically only happen in response to a
+                                * hrtimer event and likely complete before the
+                                * next callback.
+                                *
+                                * Note: This lock is not held *while* reading
+                                * and copying data to userspace so the value
+                                * of head observed in htrimer callbacks won't
+                                * represent any partial consumption of data.
+                                */
+                               spinlock_t ptr_lock;
+
+                               /**
+                                * One 'aging' tail pointer and one 'aged'
+                                * tail pointer ready to used for reading.
+                                *
+                                * Initial values of 0xffffffff are invalid
+                                * and imply that an update is required
+                                * (and should be ignored by an attempted
+                                * read)
+                                */
+                               struct {
+                                       u32 offset;
+                               } tails[2];
+
+                               /**
+                                * Index for the aged tail ready to read()
+                                * data up to.
+                                */
+                               unsigned int aged_tail_idx;
+
+                               /**
+                                * A monotonic timestamp for when the current
+                                * aging tail pointer was read; used to
+                                * determine when it is old enough to trust.
+                                */
+                               u64 aging_timestamp;
+
+                               /**
+                                * Although we can always read back the head
+                                * pointer register, we prefer to avoid
+                                * trusting the HW state, just to avoid any
+                                * risk that some hardware condition could
+                                * somehow bump the head pointer unpredictably
+                                * and cause us to forward the wrong OA buffer
+                                * data to userspace.
+                                */
+                               u32 head;
                         } oa_buffer;
  
                         u32 gen7_latched_oastatus1;
@@ -2870,7 +2887,6 @@ intel_info(const struct drm_i915_private *dev_priv)
  
  #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
  
-#define HAS_HW_CONTEXTS(dev_priv)          ((dev_priv)->info.has_hw_contexts)
  #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
                 ((dev_priv)->info.has_logical_ring_contexts)
  #define USES_PPGTT(dev_priv)           (i915.enable_ppgtt)
@@ -2909,6 +2925,7 @@ intel_info(const struct drm_i915_private *dev_priv)
  #define HAS_FW_BLC(dev_priv)   (INTEL_GEN(dev_priv) > 2)
  #define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
  #define HAS_FBC(dev_priv)      ((dev_priv)->info.has_fbc)
+#define HAS_CUR_FBC(dev_priv)  (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7)
  
  #define HAS_IPS(dev_priv)      (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
  
@@ -2931,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv)
   * properties, so we have separate macros to test them.
   */
  #define HAS_GUC(dev_priv)      ((dev_priv)->info.has_guc)
+#define HAS_GUC_CT(dev_priv)   ((dev_priv)->info.has_guc_ct)
  #define HAS_GUC_UCODE(dev_priv)        (HAS_GUC(dev_priv))
  #define HAS_GUC_SCHED(dev_priv)        (HAS_GUC(dev_priv))
  #define HAS_HUC_UCODE(dev_priv)        (HAS_GUC(dev_priv))
@@ -2981,15 +2999,26 @@ intel_info(const struct drm_i915_private *dev_priv)
  
  #include "i915_trace.h"
  
-static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+static inline bool intel_vtd_active(void)
  {
  #ifdef CONFIG_INTEL_IOMMU
-       if (INTEL_GEN(dev_priv) >= 6 && intel_iommu_gfx_mapped)
+       if (intel_iommu_gfx_mapped)
                 return true;
  #endif
         return false;
  }
  
+static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+       return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active();
+}
+
+static inline bool
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+       return IS_BROXTON(dev_priv) && intel_vtd_active();
+}
+
  int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
                                 int enable_ppgtt);
  
@@ -3026,7 +3055,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
  
-int intel_engines_init_early(struct drm_i915_private *dev_priv);
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
  int intel_engines_init(struct drm_i915_private *dev_priv);
  
  /* intel_hotplug.c */
@@ -3063,43 +3092,10 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
                        const char *fmt, ...);
  
  extern void intel_irq_init(struct drm_i915_private *dev_priv);
+extern void intel_irq_fini(struct drm_i915_private *dev_priv);
  int intel_irq_install(struct drm_i915_private *dev_priv);
  void intel_irq_uninstall(struct drm_i915_private *dev_priv);
  
-extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
-extern void intel_uncore_init(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
-extern void intel_uncore_fini(struct drm_i915_private *dev_priv);
-extern void intel_uncore_suspend(struct drm_i915_private *dev_priv);
-extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
-const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
-void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
-                               enum forcewake_domains domains);
-void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
-                               enum forcewake_domains domains);
-/* Like above but the caller must manage the uncore.lock itself.
- * Must be used with I915_READ_FW and friends.
- */
-void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
-                                       enum forcewake_domains domains);
-void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
-                                       enum forcewake_domains domains);
-u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
-
-void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
-
-int intel_wait_for_register(struct drm_i915_private *dev_priv,
-                           i915_reg_t reg,
-                           const u32 mask,
-                           const u32 value,
-                           const unsigned long timeout_ms);
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-                              i915_reg_t reg,
-                              const u32 mask,
-                              const u32 value,
-                              const unsigned long timeout_ms);
-
  static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
  {
         return dev_priv->gvt;
@@ -3447,8 +3443,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
  #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
  
  int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
-                                 bool write);
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
  int __must_check
  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
  struct i915_vma * __must_check
@@ -3711,8 +3708,8 @@ int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
  void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
  void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
  void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-                           void *eld, int port, int pipe, int tmds_clk_speed,
-                           bool dp_output, int link_rate);
+                           enum pipe pipe, enum port port,
+                           const void *eld, int ls_clock, bool dp_output);
  
  /* intel_i2c.c */
  extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 0e07f35e270ce8690abc3d16631443d30c3560f5..7ab47a84671ff2eb39770ca708d23a0de91ff003 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
  #include <linux/dma-buf.h>
  
  static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  
  static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  {
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
                                args->size, &args->handle);
  }
  
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+       return (domain == I915_GEM_DOMAIN_GTT ?
+               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+       if (!(obj->base.write_domain & flush_domains))
+               return;
+
+       /* No actual flushing is required for the GTT write domain.  Writes
+        * to it "immediately" go to main memory as far as we know, so there's
+        * no chipset flush.  It also doesn't land in render cache.
+        *
+        * However, we do have to enforce the order so that all writes through
+        * the GTT land before any writes to the device, such as updates to
+        * the GATT itself.
+        *
+        * We also have to wait a bit for the writes to land from the GTT.
+        * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+        * timing. This issue has only been observed when switching quickly
+        * between GTT writes and CPU reads from inside the kernel on recent hw,
+        * and it appears to only affect discrete GTT blocks (i.e. on LLC
+        * system agents we cannot reproduce this behaviour).
+        */
+       wmb();
+
+       switch (obj->base.write_domain) {
+       case I915_GEM_DOMAIN_GTT:
+               if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+                       if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+                               spin_lock_irq(&dev_priv->uncore.lock);
+                               POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+                               spin_unlock_irq(&dev_priv->uncore.lock);
+                               intel_runtime_pm_put(dev_priv);
+                       }
+               }
+
+               intel_fb_obj_flush(obj,
+                                  fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+               break;
+
+       case I915_GEM_DOMAIN_CPU:
+               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+               break;
+       }
+
+       obj->base.write_domain = 0;
+}
+
  static inline int
  __copy_to_user_swizzled(char __user *cpu_vaddr,
                         const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
                         goto out;
         }
  
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* If we're not in the cpu read domain, set ourself into the gtt
          * read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                         goto out;
         }
  
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* If we're not in the cpu write domain, set ourself into the
          * gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ err:
         return ret;
  }
  
-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
-       return (domain == I915_GEM_DOMAIN_GTT ?
-               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
  {
         struct drm_i915_private *i915;
@@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         if (err)
                 goto out_unpin;
  
-       if (read_domains & I915_GEM_DOMAIN_GTT)
-               err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+       if (read_domains & I915_GEM_DOMAIN_WC)
+               err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+       else if (read_domains & I915_GEM_DOMAIN_GTT)
+               err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
         else
-               err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+               err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
  
         /* And bump the LRU for this access */
         i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         mutex_unlock(&dev->struct_mutex);
  
         if (write_domain != 0)
-               intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+               intel_fb_obj_invalidate(obj,
+                                       fb_write_origin(obj, write_domain));
  
  out_unpin:
         i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
   *     into userspace. (This view is aligned and sized appropriately for
   *     fenced access.)
   *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
   * Restrictions:
   *
   *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
   */
  int i915_gem_mmap_gtt_version(void)
  {
-       return 1;
+       return 2;
  }
  
  static inline struct i915_ggtt_view
@@ -2228,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
         if (obj->mm.mapping) {
                 void *ptr;
  
-               ptr = ptr_mask_bits(obj->mm.mapping);
+               ptr = page_mask_bits(obj->mm.mapping);
                 if (is_vmalloc_addr(ptr))
                         vunmap(ptr);
                 else
@@ -2560,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
         }
         GEM_BUG_ON(!obj->mm.pages);
  
-       ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+       ptr = page_unpack_bits(obj->mm.mapping, &has_type);
         if (ptr && has_type != type) {
                 if (pinned) {
                         ret = -EBUSY;
@@ -2582,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
                         goto err_unpin;
                 }
  
-               obj->mm.mapping = ptr_pack_bits(ptr, type);
+               obj->mm.mapping = page_pack_bits(ptr, type);
         }
  
  out_unlock:
@@ -2967,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
          */
  
         if (i915.enable_execlists) {
+               struct execlist_port *port = engine->execlist_port;
                 unsigned long flags;
+               unsigned int n;
  
                 spin_lock_irqsave(&engine->timeline->lock, flags);
  
-               i915_gem_request_put(engine->execlist_port[0].request);
-               i915_gem_request_put(engine->execlist_port[1].request);
+               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+                       i915_gem_request_put(port_request(&port[n]));
                 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
                 engine->execlist_queue = RB_ROOT;
                 engine->execlist_first = NULL;
@@ -3101,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
         struct drm_i915_private *dev_priv =
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
         bool rearm_hangcheck;
  
         if (!READ_ONCE(dev_priv->gt.awake))
@@ -3140,10 +3192,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
         if (wait_for(intel_engines_are_idle(dev_priv), 10))
                 DRM_ERROR("Timeout waiting for engines to idle\n");
  
-       for_each_engine(engine, dev_priv, id) {
-               intel_engine_disarm_breadcrumbs(engine);
-               i915_gem_batch_pool_fini(&engine->batch_pool);
-       }
+       intel_engines_mark_idle(dev_priv);
+       i915_gem_timelines_mark_idle(dev_priv);
  
         GEM_BUG_ON(!dev_priv->gt.awake);
         dev_priv->gt.awake = false;
@@ -3320,56 +3370,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
         return ret;
  }
  
-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-       if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
-               return;
-
-       /* No actual flushing is required for the GTT write domain.  Writes
-        * to it "immediately" go to main memory as far as we know, so there's
-        * no chipset flush.  It also doesn't land in render cache.
-        *
-        * However, we do have to enforce the order so that all writes through
-        * the GTT land before any writes to the device, such as updates to
-        * the GATT itself.
-        *
-        * We also have to wait a bit for the writes to land from the GTT.
-        * An uncached read (i.e. mmio) seems to be ideal for the round-trip
-        * timing. This issue has only been observed when switching quickly
-        * between GTT writes and CPU reads from inside the kernel on recent hw,
-        * and it appears to only affect discrete GTT blocks (i.e. on LLC
-        * system agents we cannot reproduce this behaviour).
-        */
-       wmb();
-       if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-               if (intel_runtime_pm_get_if_in_use(dev_priv)) {
-                       spin_lock_irq(&dev_priv->uncore.lock);
-                       POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-                       spin_unlock_irq(&dev_priv->uncore.lock);
-                       intel_runtime_pm_put(dev_priv);
-               }
-       }
-
-       intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-       obj->base.write_domain = 0;
-}
-
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
-{
-       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-               return;
-
-       i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-       obj->base.write_domain = 0;
-}
-
  static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  {
         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3389,6 +3389,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
         mutex_unlock(&obj->base.dev->struct_mutex);
  }
  
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
+       if (ret)
+               return ret;
+
+       if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+               return 0;
+
+       /* Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+       /* Serialise direct access to this object with the barriers for
+        * coherent writes from the GPU, by effectively invalidating the
+        * WC domain upon first access.
+        */
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+               mb();
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+       obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+       if (write) {
+               obj->base.read_domains = I915_GEM_DOMAIN_WC;
+               obj->base.write_domain = I915_GEM_DOMAIN_WC;
+               obj->mm.dirty = true;
+       }
+
+       i915_gem_object_unpin_pages(obj);
+       return 0;
+}
+
  /**
   * Moves a single object to the GTT read, and possibly write domain.
   * @obj: object to act on
@@ -3428,7 +3491,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         if (ret)
                 return ret;
  
-       i915_gem_object_flush_cpu_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
  
         /* Serialise direct access to this object with the barriers for
          * coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3865,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
                 return 0;
  
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* Flush the CPU cache if it's still invalid. */
         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3996,7 +4059,7 @@ __busy_set_if_active(const struct dma_fence *fence,
         if (i915_gem_request_completed(rq))
                 return 0;
  
-       return flag(rq->engine->exec_id);
+       return flag(rq->engine->uabi_id);
  }
  
  static __always_inline unsigned int
@@ -4195,7 +4258,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
          * catch if we ever need to fix it. In the meantime, if you do spot
          * such a local variable, please consider fixing!
          */
-       if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+       if (size >> PAGE_SHIFT > INT_MAX)
                 return ERR_PTR(-E2BIG);
  
         if (overflows_type(size, obj->base.size))
@@ -4302,6 +4365,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
         intel_runtime_pm_put(i915);
         mutex_unlock(&i915->drm.struct_mutex);
  
+       cond_resched();
+
         llist_for_each_entry_safe(obj, on, freed, freed) {
                 GEM_BUG_ON(obj->bind_count);
                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4349,8 +4414,11 @@ static void __i915_gem_free_work(struct work_struct *work)
          * unbound now.
          */
  
-       while ((freed = llist_del_all(&i915->mm.free_list)))
+       while ((freed = llist_del_all(&i915->mm.free_list))) {
                 __i915_gem_free_objects(i915, freed);
+               if (need_resched())
+                       break;
+       }
  }
  
  static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4415,10 +4483,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
          * try to take over. The only way to remove the earlier state
          * is by resetting. However, resetting on earlier gen is tricky as
          * it may impact the display and we are uncertain about the stability
-        * of the reset, so we only reset recent machines with logical
-        * context support (that must be reset to remove any stray contexts).
+        * of the reset, so this could be applied to even earlier gen.
          */
-       if (HAS_HW_CONTEXTS(i915)) {
+       if (INTEL_GEN(i915) >= 5) {
                 int reset = intel_gpu_reset(i915, ALL_ENGINES);
                 WARN_ON(reset && reset != -ENODEV);
         }
@@ -4661,11 +4728,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
         if (value >= 0)
                 return value;
  
-#ifdef CONFIG_INTEL_IOMMU
         /* Enable semaphores on SNB when IO remapping is off */
-       if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+       if (IS_GEN6(dev_priv) && intel_vtd_active())
                 return false;
-#endif
  
         return true;
  }
@@ -4676,7 +4741,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
  
         mutex_lock(&dev_priv->drm.struct_mutex);
  
-       i915_gem_clflush_init(dev_priv);
+       dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
  
         if (!i915.enable_execlists) {
                 dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4799,12 +4864,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
         if (!dev_priv->dependencies)
                 goto err_requests;
  
+       dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+       if (!dev_priv->priorities)
+               goto err_dependencies;
+
         mutex_lock(&dev_priv->drm.struct_mutex);
         INIT_LIST_HEAD(&dev_priv->gt.timelines);
         err = i915_gem_timeline_init__global(dev_priv);
         mutex_unlock(&dev_priv->drm.struct_mutex);
         if (err)
-               goto err_dependencies;
+               goto err_priorities;
  
         INIT_LIST_HEAD(&dev_priv->context_list);
         INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4822,14 +4891,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
  
         init_waitqueue_head(&dev_priv->pending_flip_queue);
  
-       dev_priv->mm.interruptible = true;
-
         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
  
         spin_lock_init(&dev_priv->fb_tracking.lock);
  
         return 0;
  
+err_priorities:
+       kmem_cache_destroy(dev_priv->priorities);
  err_dependencies:
         kmem_cache_destroy(dev_priv->dependencies);
  err_requests:
@@ -4853,6 +4922,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
         WARN_ON(!list_empty(&dev_priv->gt.timelines));
         mutex_unlock(&dev_priv->drm.struct_mutex);
  
+       kmem_cache_destroy(dev_priv->priorities);
         kmem_cache_destroy(dev_priv->dependencies);
         kmem_cache_destroy(dev_priv->requests);
         kmem_cache_destroy(dev_priv->vmas);
@@ -4864,9 +4934,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
  
  int i915_gem_freeze(struct drm_i915_private *dev_priv)
  {
-       mutex_lock(&dev_priv->drm.struct_mutex);
+       /* Discard all purgeable objects, let userspace recover those as
+        * required after resuming.
+        */
         i915_gem_shrink_all(dev_priv);
-       mutex_unlock(&dev_priv->drm.struct_mutex);
  
         return 0;
  }
@@ -4891,12 +4962,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
          * we update that state just before writing out the image.
          *
          * To try and reduce the hibernation image, we manually shrink
-        * the objects as well.
+        * the objects as well, see i915_gem_freeze()
          */
  
-       mutex_lock(&dev_priv->drm.struct_mutex);
         i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
+       i915_gem_drain_freed_objects(dev_priv);
  
+       mutex_lock(&dev_priv->drm.struct_mutex);
         for (p = phases; *p; p++) {
                 list_for_each_entry(obj, *p, global_link) {
                         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h

index 5a49487368ca39826017207effb5db4fb4c7f514..ee54597465b60edce09d8e47e8118d7ca3b2f355 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -25,6 +25,8 @@
  #ifndef __I915_GEM_H__
  #define __I915_GEM_H__
  
+#include <linux/bug.h>
+
  #ifdef CONFIG_DRM_I915_DEBUG_GEM
  #define GEM_BUG_ON(expr) BUG_ON(expr)
  #define GEM_WARN_ON(expr) WARN_ON(expr)
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c

index ffd01e02fe94b536e36f58672b5ccfce8a6e9a9f..ffac7a1f0caf34d71588c62d88fe83f964087be6 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -27,7 +27,6 @@
  #include "i915_gem_clflush.h"
  
  static DEFINE_SPINLOCK(clflush_lock);
-static u64 clflush_context;
  
  struct clflush {
         struct dma_fence dma; /* Must be first for dma_fence_free() */
@@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                 dma_fence_init(&clflush->dma,
                                &i915_clflush_ops,
                                &clflush_lock,
-                              clflush_context,
+                              to_i915(obj->base.dev)->mm.unordered_timeline,
                                0);
                 i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
  
@@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
         }
  }
-
-void i915_gem_clflush_init(struct drm_i915_private *i915)
-{
-       clflush_context = dma_fence_context_alloc(1);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h

index b62d61a2d15fb60cb4073d4ee7d19bbdd2cd523c..2455a7820937b0f461839de713693a9010950997 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -28,7 +28,6 @@
  struct drm_i915_private;
  struct drm_i915_gem_object;
  
-void i915_gem_clflush_init(struct drm_i915_private *i915);
  void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                              unsigned int flags);
  #define I915_CLFLUSH_FORCE BIT(0)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index 8bd0c4966913f55078b3069bca6c4edb4bd4e48b..c5d1666d7071271335b6c614b05ed75b5412651c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@
  
  #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
  
-static int get_context_size(struct drm_i915_private *dev_priv)
-{
-       int ret;
-       u32 reg;
-
-       switch (INTEL_GEN(dev_priv)) {
-       case 6:
-               reg = I915_READ(CXT_SIZE);
-               ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
-               break;
-       case 7:
-               reg = I915_READ(GEN7_CXT_SIZE);
-               if (IS_HASWELL(dev_priv))
-                       ret = HSW_CXT_TOTAL_SIZE;
-               else
-                       ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
-               break;
-       case 8:
-               ret = GEN8_CXT_TOTAL_SIZE;
-               break;
-       default:
-               BUG();
-       }
-
-       return ret;
-}
-
  void i915_gem_context_free(struct kref *ctx_ref)
  {
         struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
         kfree(ctx);
  }
  
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
-       struct drm_i915_gem_object *obj;
-       int ret;
-
-       lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-       obj = i915_gem_object_create(dev_priv, size);
-       if (IS_ERR(obj))
-               return obj;
-
-       /*
-        * Try to make the context utilize L3 as well as LLC.
-        *
-        * On VLV we don't have L3 controls in the PTEs so we
-        * shouldn't touch the cache level, especially as that
-        * would make the object snooped which might have a
-        * negative performance impact.
-        *
-        * Snooping is required on non-llc platforms in execlist
-        * mode, but since all GGTT accesses use PAT entry 0 we
-        * get snooping anyway regardless of cache_level.
-        *
-        * This is only applicable for Ivy Bridge devices since
-        * later platforms don't have L3 control bits in the PTE.
-        */
-       if (IS_IVYBRIDGE(dev_priv)) {
-               ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-               /* Failure shouldn't ever happen this early */
-               if (WARN_ON(ret)) {
-                       i915_gem_object_put(obj);
-                       return ERR_PTR(ret);
-               }
-       }
-
-       return obj;
-}
-
  static void context_close(struct i915_gem_context *ctx)
  {
         i915_gem_context_set_closed(ctx);
@@ -265,26 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
         kref_init(&ctx->ref);
         list_add_tail(&ctx->link, &dev_priv->context_list);
         ctx->i915 = dev_priv;
-
-       if (dev_priv->hw_context_size) {
-               struct drm_i915_gem_object *obj;
-               struct i915_vma *vma;
-
-               obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
-               if (IS_ERR(obj)) {
-                       ret = PTR_ERR(obj);
-                       goto err_out;
-               }
-
-               vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-               if (IS_ERR(vma)) {
-                       i915_gem_object_put(obj);
-                       ret = PTR_ERR(vma);
-                       goto err_out;
-               }
-
-               ctx->engine[RCS].state = vma;
-       }
+       ctx->priority = I915_PRIORITY_NORMAL;
  
         /* Default context will never have a file_priv */
         ret = DEFAULT_CONTEXT_HANDLE;
@@ -443,21 +358,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
         BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
         ida_init(&dev_priv->context_hw_ida);
  
-       if (i915.enable_execlists) {
-               /* NB: intentionally left blank. We will allocate our own
-                * backing objects as we need them, thank you very much */
-               dev_priv->hw_context_size = 0;
-       } else if (HAS_HW_CONTEXTS(dev_priv)) {
-               dev_priv->hw_context_size =
-                       round_up(get_context_size(dev_priv),
-                                I915_GTT_PAGE_SIZE);
-               if (dev_priv->hw_context_size > (1<<20)) {
-                       DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
-                                        dev_priv->hw_context_size);
-                       dev_priv->hw_context_size = 0;
-               }
-       }
-
         ctx = i915_gem_create_context(dev_priv, NULL);
         if (IS_ERR(ctx)) {
                 DRM_ERROR("Failed to create default global context (error %ld)\n",
@@ -477,8 +377,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
         GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
  
         DRM_DEBUG_DRIVER("%s context support initialized\n",
-                       i915.enable_execlists ? "LR" :
-                       dev_priv->hw_context_size ? "HW" : "fake");
+                        dev_priv->engine[RCS]->context_size ? "logical" :
+                        "fake");
         return 0;
  }
  
@@ -941,11 +841,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
         return 0;
  }
  
-static bool contexts_enabled(struct drm_device *dev)
-{
-       return i915.enable_execlists || to_i915(dev)->hw_context_size;
-}
-
  static bool client_is_banned(struct drm_i915_file_private *file_priv)
  {
         return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -954,12 +849,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file)
  {
+       struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_i915_gem_context_create *args = data;
         struct drm_i915_file_private *file_priv = file->driver_priv;
         struct i915_gem_context *ctx;
         int ret;
  
-       if (!contexts_enabled(dev))
+       if (!dev_priv->engine[RCS]->context_size)
                 return -ENODEV;
  
         if (args->pad != 0)
@@ -977,7 +873,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
         if (ret)
                 return ret;
  
-       ctx = i915_gem_create_context(to_i915(dev), file_priv);
+       ctx = i915_gem_create_context(dev_priv, file_priv);
         mutex_unlock(&dev->struct_mutex);
         if (IS_ERR(ctx))
                 return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c

index f225bf680b6de190ce1cbb4aa0e67a5b3d5f603a..6176e589cf09f9b287cf25700a27a170255d6fe8 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -122,12 +122,36 @@ static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long
  }
  static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
  {
+       struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+       struct page *page;
+
+       if (page_num >= obj->base.size >> PAGE_SHIFT)
+               return NULL;
+
+       if (!i915_gem_object_has_struct_page(obj))
+               return NULL;
+
+       if (i915_gem_object_pin_pages(obj))
+               return NULL;
+
+       /* Synchronisation is left to the caller (via .begin_cpu_access()) */
+       page = i915_gem_object_get_page(obj, page_num);
+       if (IS_ERR(page))
+               goto err_unpin;
+
+       return kmap(page);
+
+err_unpin:
+       i915_gem_object_unpin_pages(obj);
         return NULL;
  }
  
  static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
  {
+       struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
  
+       kunmap(virt_to_page(addr));
+       i915_gem_object_unpin_pages(obj);
  }
  
  static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index 4ee2dc38b7c94af5086778c904235636e1de067e..04211c970b9f23d9eb7bd5d16d236f831c5f6a3b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
         list_for_each_entry(vma, vmas, exec_list) {
                 struct drm_i915_gem_object *obj = vma->obj;
  
+               if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+                       struct i915_gem_capture_list *capture;
+
+                       capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+                       if (unlikely(!capture))
+                               return -ENOMEM;
+
+                       capture->next = req->capture_list;
+                       capture->vma = vma;
+                       req->capture_list = capture;
+               }
+
                 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
                         continue;
  
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c

index 7e3193aa7da11105a25b8ae00f45f1efea1dc8b3..0c1008a2bbda88f5b723eeb01cb2eafe748effa6 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -168,13 +168,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
                 return 3;
  
-#ifdef CONFIG_INTEL_IOMMU
         /* Disable ppgtt on SNB if VT-d is on. */
-       if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
+       if (IS_GEN6(dev_priv) && intel_vtd_active()) {
                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
                 return 0;
         }
-#endif
  
         /* Early VLV doesn't have this */
         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
@@ -195,9 +193,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
         u32 pte_flags;
         int ret;
  
-       ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
-       if (ret)
-               return ret;
+       if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+               ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
+                                                vma->size);
+               if (ret)
+                       return ret;
+       }
  
         vma->pages = vma->obj->mm.pages;
  
@@ -1989,14 +1990,10 @@ void i915_ppgtt_release(struct kref *kref)
   */
  static bool needs_idle_maps(struct drm_i915_private *dev_priv)
  {
-#ifdef CONFIG_INTEL_IOMMU
         /* Query intel_iommu to see if we need the workaround. Presumably that
          * was loaded first.
          */
-       if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
-               return true;
-#endif
-       return false;
+       return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
  }
  
  void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
@@ -2188,6 +2185,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
                 gen8_set_pte(&gtt_base[i], scratch_pte);
  }
  
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+       struct drm_i915_private *dev_priv = vm->i915;
+
+       /*
+        * Make sure the internal GAM fifo has been cleared of all GTT
+        * writes before exiting stop_machine(). This guarantees that
+        * any aperture accesses waiting to start in another process
+        * cannot back up behind the GTT writes causing a hang.
+        * The register can be any arbitrary GAM register.
+        */
+       POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+       struct i915_address_space *vm;
+       dma_addr_t addr;
+       u64 offset;
+       enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+       struct insert_page *arg = _arg;
+
+       gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+       bxt_vtd_ggtt_wa(arg->vm);
+
+       return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+                                         dma_addr_t addr,
+                                         u64 offset,
+                                         enum i915_cache_level level,
+                                         u32 unused)
+{
+       struct insert_page arg = { vm, addr, offset, level };
+
+       stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+       struct i915_address_space *vm;
+       struct sg_table *st;
+       u64 start;
+       enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+       struct insert_entries *arg = _arg;
+
+       gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
+       bxt_vtd_ggtt_wa(arg->vm);
+
+       return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+                                            struct sg_table *st,
+                                            u64 start,
+                                            enum i915_cache_level level,
+                                            u32 unused)
+{
+       struct insert_entries arg = { vm, st, start, level };
+
+       stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+       struct i915_address_space *vm;
+       u64 start;
+       u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+       struct clear_range *arg = _arg;
+
+       gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+       bxt_vtd_ggtt_wa(arg->vm);
+
+       return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+                                         u64 start,
+                                         u64 length)
+{
+       struct clear_range arg = { vm, start, length };
+
+       stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
                                   u64 start, u64 length)
  {
@@ -2306,10 +2398,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
         if (flags & I915_VMA_LOCAL_BIND) {
                 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
  
-               if (appgtt->base.allocate_va_range) {
+               if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
+                   appgtt->base.allocate_va_range) {
                         ret = appgtt->base.allocate_va_range(&appgtt->base,
                                                              vma->node.start,
-                                                            vma->node.size);
+                                                            vma->size);
                         if (ret)
                                 goto err_pages;
                 }
@@ -2579,14 +2672,14 @@ static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
  {
         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
-       return snb_gmch_ctl << 25; /* 32 MB units */
+       return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
  }
  
  static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
  {
         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
-       return bdw_gmch_ctl << 25; /* 32 MB units */
+       return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
  }
  
  static size_t chv_get_stolen_size(u16 gmch_ctrl)
@@ -2600,11 +2693,11 @@ static size_t chv_get_stolen_size(u16 gmch_ctrl)
          * 0x17 to 0x1d: 4MB increments start at 36MB
          */
         if (gmch_ctrl < 0x11)
-               return gmch_ctrl << 25;
+               return (size_t)gmch_ctrl << 25;
         else if (gmch_ctrl < 0x17)
-               return (gmch_ctrl - 0x11 + 2) << 22;
+               return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
         else
-               return (gmch_ctrl - 0x17 + 9) << 22;
+               return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
  }
  
  static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
@@ -2613,10 +2706,10 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
  
         if (gen9_gmch_ctl < 0xf0)
-               return gen9_gmch_ctl << 25; /* 32 MB units */
+               return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
         else
                 /* 4MB increments starting at 0xf0 for 4MB */
-               return (gen9_gmch_ctl - 0xf0 + 1) << 22;
+               return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
  }
  
  static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
@@ -2743,13 +2836,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
         struct pci_dev *pdev = dev_priv->drm.pdev;
         unsigned int size;
         u16 snb_gmch_ctl;
+       int err;
  
         /* TODO: We're not aware of mappable constraints on gen8 yet */
         ggtt->mappable_base = pci_resource_start(pdev, 2);
         ggtt->mappable_end = pci_resource_len(pdev, 2);
  
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
-               pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+       if (!err)
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+       if (err)
+               DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
  
         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  
@@ -2781,6 +2878,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
  
         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
  
+       /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+       if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
+               ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+               ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+               if (ggtt->base.clear_range != nop_clear_range)
+                       ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+       }
+
         ggtt->invalidate = gen6_ggtt_invalidate;
  
         return ggtt_probe_common(ggtt, size);
@@ -2792,6 +2897,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
         struct pci_dev *pdev = dev_priv->drm.pdev;
         unsigned int size;
         u16 snb_gmch_ctl;
+       int err;
  
         ggtt->mappable_base = pci_resource_start(pdev, 2);
         ggtt->mappable_end = pci_resource_len(pdev, 2);
@@ -2804,8 +2910,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
                 return -ENXIO;
         }
  
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
-               pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+       if (!err)
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+       if (err)
+               DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  
         ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
@@ -2924,10 +3033,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
                  ggtt->base.total >> 20);
         DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
         DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
-#ifdef CONFIG_INTEL_IOMMU
-       if (intel_iommu_gfx_mapped)
+       if (intel_vtd_active())
                 DRM_INFO("VT-d active for gfx access\n");
-#endif
  
         return 0;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h

index 174cf923c23633e6e5ec84645bfa22b573e80575..35e1a27729dc8b59aad40d8da6c8a6440436c217 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -37,8 +37,8 @@
  
  struct drm_i915_gem_object_ops {
         unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
-#define I915_GEM_OBJECT_IS_SHRINKABLE   0x2
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE   BIT(1)
  
         /* Interface between the GEM object and its backing storage.
          * get_pages() is called once prior to the use of the associated set
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 5ddbc94997751adf5c9f04f7dd4a37a74d70de24..0d1e0d8873ef61f143609f8b19161cd921caa4d5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
         if (i915_fence_signaled(fence))
                 return false;
  
-       intel_engine_enable_signaling(to_request(fence));
+       intel_engine_enable_signaling(to_request(fence), true);
         return true;
  }
  
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
  {
         struct i915_dependency *dep, *next;
  
-       GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+       GEM_BUG_ON(!list_empty(&pt->link));
  
         /* Everyone we depended upon (the fences we wait to be signaled)
          * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
  {
         INIT_LIST_HEAD(&pt->signalers_list);
         INIT_LIST_HEAD(&pt->waiters_list);
-       RB_CLEAR_NODE(&pt->node);
+       INIT_LIST_HEAD(&pt->link);
         pt->priority = INT_MIN;
  }
  
@@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
                 }
  
                 /* Finally reset hw state */
-               tl->seqno = seqno;
                 intel_engine_init_global_seqno(engine, seqno);
+               tl->seqno = seqno;
  
                 list_for_each_entry(timeline, &i915->gt.timelines, link)
-                       memset(timeline->engine[id].sync_seqno, 0,
-                              sizeof(timeline->engine[id].sync_seqno));
+                       memset(timeline->engine[id].global_sync, 0,
+                              sizeof(timeline->engine[id].global_sync));
         }
  
         return 0;
@@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
         /* Space left intentionally blank */
  }
  
+static void advance_ring(struct drm_i915_gem_request *request)
+{
+       unsigned int tail;
+
+       /* We know the GPU must have read the request to have
+        * sent us the seqno + interrupt, so use the position
+        * of tail of the request to update the last known position
+        * of the GPU head.
+        *
+        * Note this requires that we are always called in request
+        * completion order.
+        */
+       if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+               /* We may race here with execlists resubmitting this request
+                * as we retire it. The resubmission will move the ring->tail
+                * forwards (to request->wa_tail). We either read the
+                * current value that was written to hw, or the value that
+                * is just about to be. Either works, if we miss the last two
+                * noops - they are safe to be replayed on a reset.
+                */
+               tail = READ_ONCE(request->ring->tail);
+       } else {
+               tail = request->postfix;
+       }
+       list_del(&request->ring_link);
+
+       request->ring->head = tail;
+}
+
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+       struct i915_gem_capture_list *capture;
+
+       capture = request->capture_list;
+       while (capture) {
+               struct i915_gem_capture_list *next = capture->next;
+
+               kfree(capture);
+               capture = next;
+       }
+}
+
  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  {
         struct intel_engine_cs *engine = request->engine;
@@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
         list_del_init(&request->link);
         spin_unlock_irq(&engine->timeline->lock);
  
-       /* We know the GPU must have read the request to have
-        * sent us the seqno + interrupt, so use the position
-        * of tail of the request to update the last known position
-        * of the GPU head.
-        *
-        * Note this requires that we are always called in request
-        * completion order.
-        */
-       list_del(&request->ring_link);
-       request->ring->head = request->postfix;
         if (!--request->i915->gt.active_requests) {
                 GEM_BUG_ON(!request->i915->gt.awake);
                 mod_delayed_work(request->i915->wq,
@@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
                                  msecs_to_jiffies(100));
         }
         unreserve_seqno(request->engine);
+       advance_ring(request);
+
+       free_capture_list(request);
  
         /* Walk through the active list, calling retire on each. This allows
          * objects to track their GPU activity and mark themselves as idle
@@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
         request->global_seqno = seqno;
         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               intel_engine_enable_signaling(request);
+               intel_engine_enable_signaling(request, false);
         spin_unlock(&request->lock);
  
         engine->emit_breadcrumb(request,
@@ -503,9 +538,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
   *
   * @engine: engine that we wish to issue the request on.
   * @ctx: context that the request will be associated with.
- *       This can be NULL if the request is not directly related to
- *       any specific user context, in which case this function will
- *       choose an appropriate context to use.
   *
   * Returns a pointer to the allocated request if successful,
   * or an error code if not.
@@ -516,6 +548,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
  {
         struct drm_i915_private *dev_priv = engine->i915;
         struct drm_i915_gem_request *req;
+       struct intel_ring *ring;
         int ret;
  
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -530,9 +563,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
          * GGTT space, so do this first before we reserve a seqno for
          * ourselves.
          */
-       ret = engine->context_pin(engine, ctx);
-       if (ret)
-               return ERR_PTR(ret);
+       ring = engine->context_pin(engine, ctx);
+       if (IS_ERR(ring))
+               return ERR_CAST(ring);
+       GEM_BUG_ON(!ring);
  
         ret = reserve_seqno(engine);
         if (ret)
@@ -598,11 +632,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         req->i915 = dev_priv;
         req->engine = engine;
         req->ctx = ctx;
+       req->ring = ring;
  
         /* No zalloc, must clear what we need by hand */
         req->global_seqno = 0;
         req->file_priv = NULL;
         req->batch = NULL;
+       req->capture_list = NULL;
  
         /*
          * Reserve space in the ring buffer for all the commands required to
@@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
          * GPU processing the request, we never over-estimate the
          * position of the head.
          */
-       req->head = req->ring->tail;
+       req->head = req->ring->emit;
  
         /* Check that we didn't interrupt ourselves with a new request */
         GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
@@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
         int ret;
  
         GEM_BUG_ON(to == from);
+       GEM_BUG_ON(to->timeline == from->timeline);
  
         if (i915_gem_request_completed(from))
                 return 0;
@@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
                         return ret;
         }
  
-       if (to->timeline == from->timeline)
-               return 0;
-
         if (to->engine == from->engine) {
                 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
                                                        &from->submit,
@@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
         }
  
         seqno = i915_gem_request_global_seqno(from);
-       if (!seqno) {
-               ret = i915_sw_fence_await_dma_fence(&to->submit,
-                                                   &from->fence, 0,
-                                                   GFP_KERNEL);
-               return ret < 0 ? ret : 0;
-       }
+       if (!seqno)
+               goto await_dma_fence;
  
-       if (seqno <= to->timeline->sync_seqno[from->engine->id])
-               return 0;
+       if (!to->engine->semaphore.sync_to) {
+               if (!__i915_gem_request_started(from, seqno))
+                       goto await_dma_fence;
  
-       trace_i915_gem_ring_sync_to(to, from);
-       if (!i915.semaphores) {
-               if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
-                       ret = i915_sw_fence_await_dma_fence(&to->submit,
-                                                           &from->fence, 0,
-                                                           GFP_KERNEL);
-                       if (ret < 0)
-                               return ret;
-               }
+               if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
+                       goto await_dma_fence;
         } else {
+               GEM_BUG_ON(!from->engine->semaphore.signal);
+
+               if (seqno <= to->timeline->global_sync[from->engine->id])
+                       return 0;
+
+               trace_i915_gem_ring_sync_to(to, from);
                 ret = to->engine->semaphore.sync_to(to, from);
                 if (ret)
                         return ret;
+
+               to->timeline->global_sync[from->engine->id] = seqno;
         }
  
-       to->timeline->sync_seqno[from->engine->id] = seqno;
         return 0;
+
+await_dma_fence:
+       ret = i915_sw_fence_await_dma_fence(&to->submit,
+                                           &from->fence, 0,
+                                           GFP_KERNEL);
+       return ret < 0 ? ret : 0;
  }
  
  int
  i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
                                  struct dma_fence *fence)
  {
-       struct dma_fence_array *array;
+       struct dma_fence **child = &fence;
+       unsigned int nchild = 1;
         int ret;
-       int i;
-
-       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-               return 0;
-
-       if (dma_fence_is_i915(fence))
-               return i915_gem_request_await_request(req, to_request(fence));
-
-       if (!dma_fence_is_array(fence)) {
-               ret = i915_sw_fence_await_dma_fence(&req->submit,
-                                                   fence, I915_FENCE_TIMEOUT,
-                                                   GFP_KERNEL);
-               return ret < 0 ? ret : 0;
-       }
  
         /* Note that if the fence-array was created in signal-on-any mode,
          * we should *not* decompose it into its individual fences. However,
@@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
          * amdgpu and we should not see any incoming fence-array from
          * sync-file being in signal-on-any mode.
          */
+       if (dma_fence_is_array(fence)) {
+               struct dma_fence_array *array = to_dma_fence_array(fence);
+
+               child = array->fences;
+               nchild = array->num_fences;
+               GEM_BUG_ON(!nchild);
+       }
+
+       do {
+               fence = *child++;
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+                       continue;
+
+               /*
+                * Requests on the same timeline are explicitly ordered, along
+                * with their dependencies, by i915_add_request() which ensures
+                * that requests are submitted in-order through each ring.
+                */
+               if (fence->context == req->fence.context)
+                       continue;
  
-       array = to_dma_fence_array(fence);
-       for (i = 0; i < array->num_fences; i++) {
-               struct dma_fence *child = array->fences[i];
+               /* Squash repeated waits to the same timelines */
+               if (fence->context != req->i915->mm.unordered_timeline &&
+                   intel_timeline_sync_is_later(req->timeline, fence))
+                       continue;
  
-               if (dma_fence_is_i915(child))
+               if (dma_fence_is_i915(fence))
                         ret = i915_gem_request_await_request(req,
-                                                            to_request(child));
+                                                            to_request(fence));
                 else
-                       ret = i915_sw_fence_await_dma_fence(&req->submit,
-                                                           child, I915_FENCE_TIMEOUT,
+                       ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
+                                                           I915_FENCE_TIMEOUT,
                                                             GFP_KERNEL);
                 if (ret < 0)
                         return ret;
-       }
+
+               /* Record the latest fence used against each timeline */
+               if (fence->context != req->i915->mm.unordered_timeline)
+                       intel_timeline_sync_set(req->timeline, fence);
+       } while (--nchild);
  
         return 0;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index 129c58bb4805509ee708830458e766cfaa24237e..7b7c84369d782e7be6020102d3687ff0093fe0da 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,12 +67,18 @@ struct i915_dependency {
  struct i915_priotree {
         struct list_head signalers_list; /* those before us, we depend upon */
         struct list_head waiters_list; /* those after us, they depend upon us */
-       struct rb_node node;
+       struct list_head link;
         int priority;
  #define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_NORMAL 0
  #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
  };
  
+struct i915_gem_capture_list {
+       struct i915_gem_capture_list *next;
+       struct i915_vma *vma;
+};
+
  /**
   * Request queue structure.
   *
@@ -167,6 +173,12 @@ struct drm_i915_gem_request {
          * error state dump only).
          */
         struct i915_vma *batch;
+       /** Additional buffers requested by userspace to be captured upon
+        * a GPU hang. The vma/obj on this list are protected by their
+        * active reference - all objects on this list must also be
+        * on the active_list (of their final request).
+        */
+       struct i915_gem_capture_list *capture_list;
         struct list_head active_list;
  
         /** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c

index 129ed303a6c46e2f856eb1abc84990079abefb65..0fd2b58ce47517b0ea82d754d0cbded57a8e98f5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
  #include "i915_drv.h"
  #include "i915_trace.h"
  
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
  {
-       switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+       switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
         case MUTEX_TRYLOCK_FAILED:
                 return false;
  
@@ -53,24 +53,29 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
         BUG();
  }
  
-static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
  {
         if (!unlock)
                 return;
  
-       mutex_unlock(&dev->struct_mutex);
-
-       /* expedite the RCU grace period to free some request slabs */
-       synchronize_rcu_expedited();
+       mutex_unlock(&dev_priv->drm.struct_mutex);
  }
  
  static bool any_vma_pinned(struct drm_i915_gem_object *obj)
  {
         struct i915_vma *vma;
  
-       list_for_each_entry(vma, &obj->vma_list, obj_link)
+       list_for_each_entry(vma, &obj->vma_list, obj_link) {
+               /* Only GGTT vma may be permanently pinned, and are always
+                * at the start of the list. We can stop hunting as soon
+                * as we see a ppGTT vma.
+                */
+               if (!i915_vma_is_ggtt(vma))
+                       break;
+
                 if (i915_vma_is_pinned(vma))
                         return true;
+       }
  
         return false;
  }
@@ -156,7 +161,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
         unsigned long count = 0;
         bool unlock;
  
-       if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+       if (!shrinker_lock(dev_priv, &unlock))
                 return 0;
  
         trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +249,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
  
         i915_gem_retire_requests(dev_priv);
  
-       i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
+       shrinker_unlock(dev_priv, unlock);
  
         return count;
  }
@@ -274,8 +279,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
                                 I915_SHRINK_ACTIVE);
         intel_runtime_pm_put(dev_priv);
  
-       synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
-
         return freed;
  }
  
@@ -284,12 +287,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
  {
         struct drm_i915_private *dev_priv =
                 container_of(shrinker, struct drm_i915_private, mm.shrinker);
-       struct drm_device *dev = &dev_priv->drm;
         struct drm_i915_gem_object *obj;
         unsigned long count;
         bool unlock;
  
-       if (!i915_gem_shrinker_lock(dev, &unlock))
+       if (!shrinker_lock(dev_priv, &unlock))
                 return 0;
  
         i915_gem_retire_requests(dev_priv);
@@ -304,7 +306,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
                         count += obj->base.size >> PAGE_SHIFT;
         }
  
-       i915_gem_shrinker_unlock(dev, unlock);
+       shrinker_unlock(dev_priv, unlock);
  
         return count;
  }
@@ -314,11 +316,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
  {
         struct drm_i915_private *dev_priv =
                 container_of(shrinker, struct drm_i915_private, mm.shrinker);
-       struct drm_device *dev = &dev_priv->drm;
         unsigned long freed;
         bool unlock;
  
-       if (!i915_gem_shrinker_lock(dev, &unlock))
+       if (!shrinker_lock(dev_priv, &unlock))
                 return SHRINK_STOP;
  
         freed = i915_gem_shrink(dev_priv,
@@ -332,26 +333,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
                                          I915_SHRINK_BOUND |
                                          I915_SHRINK_UNBOUND);
  
-       i915_gem_shrinker_unlock(dev, unlock);
+       shrinker_unlock(dev_priv, unlock);
  
         return freed;
  }
  
-struct shrinker_lock_uninterruptible {
-       bool was_interruptible;
-       bool unlock;
-};
-
  static bool
-i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
-                                      struct shrinker_lock_uninterruptible *slu,
-                                      int timeout_ms)
+shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+                             int timeout_ms)
  {
         unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
  
         do {
                 if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-                   i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
+                   shrinker_lock(dev_priv, unlock))
                         break;
  
                 schedule_timeout_killable(1);
@@ -364,29 +359,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
                 }
         } while (1);
  
-       slu->was_interruptible = dev_priv->mm.interruptible;
-       dev_priv->mm.interruptible = false;
         return true;
  }
  
-static void
-i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
-                                        struct shrinker_lock_uninterruptible *slu)
-{
-       dev_priv->mm.interruptible = slu->was_interruptible;
-       i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
-}
-
  static int
  i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
  {
         struct drm_i915_private *dev_priv =
                 container_of(nb, struct drm_i915_private, mm.oom_notifier);
-       struct shrinker_lock_uninterruptible slu;
         struct drm_i915_gem_object *obj;
         unsigned long unevictable, bound, unbound, freed_pages;
+       bool unlock;
  
-       if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+       if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
                 return NOTIFY_DONE;
  
         freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +400,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
                         bound += obj->base.size >> PAGE_SHIFT;
         }
  
-       i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+       shrinker_unlock(dev_priv, unlock);
  
         if (freed_pages || unbound || bound)
                 pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +420,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
  {
         struct drm_i915_private *dev_priv =
                 container_of(nb, struct drm_i915_private, mm.vmap_notifier);
-       struct shrinker_lock_uninterruptible slu;
         struct i915_vma *vma, *next;
         unsigned long freed_pages = 0;
+       bool unlock;
         int ret;
  
-       if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+       if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
                 return NOTIFY_DONE;
  
         /* Force everything onto the inactive lists */
@@ -465,7 +450,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
         }
  
  out:
-       i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+       shrinker_unlock(dev_priv, unlock);
  
         *(unsigned long *)ptr += freed_pages;
         return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c

index f3abdc27c5dd16171dd0d5b00560148d30ab3aca..681db6083f4dac0a37336c9b85238c338723bb8b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -414,12 +414,10 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
                 return 0;
         }
  
-#ifdef CONFIG_INTEL_IOMMU
-       if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
+       if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
                 DRM_INFO("DMAR active, disabling use of stolen memory\n");
                 return 0;
         }
-#endif
  
         if (ggtt->stolen_size == 0)
                 return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c

index b596ca7ee058cc7a02dbf1dd3073593e956548a8..c597ce277a043b34a1a0f8c6a9e5f14a71bbd62c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -23,6 +23,32 @@
   */
  
  #include "i915_drv.h"
+#include "i915_syncmap.h"
+
+static void __intel_timeline_init(struct intel_timeline *tl,
+                                 struct i915_gem_timeline *parent,
+                                 u64 context,
+                                 struct lock_class_key *lockclass,
+                                 const char *lockname)
+{
+       tl->fence_context = context;
+       tl->common = parent;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
+#else
+       spin_lock_init(&tl->lock);
+#endif
+       init_request_active(&tl->last_request, NULL);
+       INIT_LIST_HEAD(&tl->requests);
+       i915_syncmap_init(&tl->sync);
+}
+
+static void __intel_timeline_fini(struct intel_timeline *tl)
+{
+       GEM_BUG_ON(!list_empty(&tl->requests));
+
+       i915_syncmap_free(&tl->sync);
+}
  
  static int __i915_gem_timeline_init(struct drm_i915_private *i915,
                                     struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
  
         lockdep_assert_held(&i915->drm.struct_mutex);
  
+       /*
+        * Ideally we want a set of engines on a single leaf as we expect
+        * to mostly be tracking synchronisation between engines. It is not
+        * a huge issue if this is not the case, but we may want to mitigate
+        * any page crossing penalties if they become an issue.
+        */
+       BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
         timeline->i915 = i915;
         timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
         if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
  
         /* Called during early_init before we know how many engines there are */
         fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-       for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-               struct intel_timeline *tl = &timeline->engine[i];
-
-               tl->fence_context = fences++;
-               tl->common = timeline;
-#ifdef CONFIG_DEBUG_SPINLOCK
-               __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
-               spin_lock_init(&tl->lock);
-#endif
-               init_request_active(&tl->last_request, NULL);
-               INIT_LIST_HEAD(&tl->requests);
-       }
+       for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+               __intel_timeline_init(&timeline->engine[i],
+                                     timeline, fences++,
+                                     lockclass, lockname);
  
         return 0;
  }
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
                                         &class, "&global_timeline->lock");
  }
  
+/**
+ * i915_gem_timelines_mark_idle -- called when the driver idles
+ * @i915 - the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
+{
+       struct i915_gem_timeline *timeline;
+       int i;
+
+       lockdep_assert_held(&i915->drm.struct_mutex);
+
+       list_for_each_entry(timeline, &i915->gt.timelines, link) {
+               for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
+                       struct intel_timeline *tl = &timeline->engine[i];
+
+                       /*
+                        * All known fences are completed so we can scrap
+                        * the current sync point tracking and start afresh,
+                        * any attempt to wait upon a previous sync point
+                        * will be skipped as the fence was signaled.
+                        */
+                       i915_syncmap_free(&tl->sync);
+               }
+       }
+}
+
  void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
  {
         int i;
  
         lockdep_assert_held(&timeline->i915->drm.struct_mutex);
  
-       for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-               struct intel_timeline *tl = &timeline->engine[i];
-
-               GEM_BUG_ON(!list_empty(&tl->requests));
-       }
+       for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+               __intel_timeline_fini(&timeline->engine[i]);
  
         list_del(&timeline->link);
         kfree(timeline->name);
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_timeline.c"
+#include "selftests/i915_gem_timeline.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h

index 6c53e14cab2a4d307b7eb4e13aafea0a71ed458f..bfb5eb94c64d0eadacfa48a999110a0f96cdbed9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -27,7 +27,9 @@
  
  #include <linux/list.h>
  
+#include "i915_utils.h"
  #include "i915_gem_request.h"
+#include "i915_syncmap.h"
  
  struct i915_gem_timeline;
  
@@ -55,7 +57,25 @@ struct intel_timeline {
          * struct_mutex.
          */
         struct i915_gem_active last_request;
-       u32 sync_seqno[I915_NUM_ENGINES];
+
+       /**
+        * We track the most recent seqno that we wait on in every context so
+        * that we only have to emit a new await and dependency on a more
+        * recent sync point. As the contexts may be executed out-of-order, we
+        * have to track each individually and can not rely on an absolute
+        * global_seqno. When we know that all tracked fences are completed
+        * (i.e. when the driver is idle), we know that the syncmap is
+        * redundant and we can discard it without loss of generality.
+        */
+       struct i915_syncmap *sync;
+       /**
+        * Separately to the inter-context seqno map above, we track the last
+        * barrier (e.g. semaphore wait) to the global engine timelines. Note
+        * that this tracks global_seqno rather than the context.seqno, and
+        * so it is subject to the limitations of hw wraparound and that we
+        * may need to revoke global_seqno (on pre-emption).
+        */
+       u32 global_sync[I915_NUM_ENGINES];
  
         struct i915_gem_timeline *common;
  };
@@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
                            struct i915_gem_timeline *tl,
                            const char *name);
  int i915_gem_timeline_init__global(struct drm_i915_private *i915);
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
  void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
  
+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+                                           u64 context, u32 seqno)
+{
+       return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+                                         const struct dma_fence *fence)
+{
+       return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+                                                 u64 context, u32 seqno)
+{
+       return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+                                               const struct dma_fence *fence)
+{
+       return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
  #endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c

index 8effc59f5cb572651bd7f98fba747821bef0dcc8..e18f350bc364253b7228c3d913f1797ce7b1f919 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
                         print_error_obj(m, dev_priv->engine[i], NULL, obj);
                 }
  
+               for (j = 0; j < ee->user_bo_count; j++)
+                       print_error_obj(m, dev_priv->engine[i],
+                                       "user", ee->user_bo[j]);
+
                 if (ee->num_requests) {
                         err_printf(m, "%s --- %d requests\n",
                                    dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
  {
         struct i915_gpu_state *error =
                 container_of(error_ref, typeof(*error), ref);
-       int i;
+       long i, j;
  
         for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
                 struct drm_i915_error_engine *ee = &error->engine[i];
  
+               for (j = 0; j < ee->user_bo_count; j++)
+                       i915_error_object_free(ee->user_bo[j]);
+               kfree(ee->user_bo);
+
                 i915_error_object_free(ee->batchbuffer);
                 i915_error_object_free(ee->wa_batchbuffer);
                 i915_error_object_free(ee->ringbuffer);
@@ -1316,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
  static void error_record_engine_execlists(struct intel_engine_cs *engine,
                                           struct drm_i915_error_engine *ee)
  {
+       const struct execlist_port *port = engine->execlist_port;
         unsigned int n;
  
-       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-               if (engine->execlist_port[n].request)
-                       record_request(engine->execlist_port[n].request,
-                                      &ee->execlist[n]);
+       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+               struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+               if (!rq)
+                       break;
+
+               record_request(rq, &ee->execlist[n]);
+       }
  }
  
  static void record_context(struct drm_i915_error_context *e,
@@ -1346,6 +1359,35 @@ static void record_context(struct drm_i915_error_context *e,
         e->active = ctx->active_count;
  }
  
+static void request_record_user_bo(struct drm_i915_gem_request *request,
+                                  struct drm_i915_error_engine *ee)
+{
+       struct i915_gem_capture_list *c;
+       struct drm_i915_error_object **bo;
+       long count;
+
+       count = 0;
+       for (c = request->capture_list; c; c = c->next)
+               count++;
+
+       bo = NULL;
+       if (count)
+               bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+       if (!bo)
+               return;
+
+       count = 0;
+       for (c = request->capture_list; c; c = c->next) {
+               bo[count] = i915_error_object_create(request->i915, c->vma);
+               if (!bo[count])
+                       break;
+               count++;
+       }
+
+       ee->user_bo = bo;
+       ee->user_bo_count = count;
+}
+
  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                                   struct i915_gpu_state *error)
  {
@@ -1392,6 +1434,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                                 ee->wa_batchbuffer =
                                         i915_error_object_create(dev_priv,
                                                                  engine->scratch);
+                       request_record_user_bo(request, ee);
  
                         ee->ctx =
                                 i915_error_object_create(dev_priv,
@@ -1560,6 +1603,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
                 error->done_reg = I915_READ(DONE_REG);
         }
  
+       if (INTEL_GEN(dev_priv) >= 5)
+               error->ccid = I915_READ(CCID);
+
         /* 3: Feature specific registers */
         if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
                 error->gam_ecochk = I915_READ(GAM_ECOCHK);
@@ -1567,9 +1613,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
         }
  
         /* 4: Everything else */
-       if (HAS_HW_CONTEXTS(dev_priv))
-               error->ccid = I915_READ(CCID);
-
         if (INTEL_GEN(dev_priv) >= 8) {
                 error->ier = I915_READ(GEN8_DE_MISC_IER);
                 for (i = 0; i < 4; i++)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c

index 1642fff9cf135d5edbe85864d1b327d59002c026..e6e0c6ef1084b99b86c50849773925a9fbe065c1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
         GEM_BUG_ON(freespace < wqi_size);
  
         /* The GuC firmware wants the tail index in QWords, not bytes */
-       tail = rq->tail;
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       tail >>= 3;
+       tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
         GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
  
         /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
@@ -616,12 +614,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
         b_ret = guc_ring_doorbell(client);
  
         client->submissions[engine_id] += 1;
-       client->retcode = b_ret;
-       if (b_ret)
-               client->b_fail += 1;
-
-       guc->submissions[engine_id] += 1;
-       guc->last_seqno[engine_id] = rq->global_seqno;
  
         spin_unlock_irqrestore(&client->wq_lock, flags);
  }
@@ -651,47 +643,68 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
         trace_dma_fence_enable_signal(&rq->fence);
  
         spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
-       intel_engine_enable_signaling(rq);
+       intel_engine_enable_signaling(rq, true);
         spin_unlock(&rq->lock);
  }
  
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, i915_gem_request_get(rq));
+       nested_enable_signaling(rq);
+}
+
  static bool i915_guc_dequeue(struct intel_engine_cs *engine)
  {
         struct execlist_port *port = engine->execlist_port;
-       struct drm_i915_gem_request *last = port[0].request;
+       struct drm_i915_gem_request *last = port_request(port);
         struct rb_node *rb;
         bool submit = false;
  
         spin_lock_irq(&engine->timeline->lock);
         rb = engine->execlist_first;
+       GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
         while (rb) {
-               struct drm_i915_gem_request *rq =
-                       rb_entry(rb, typeof(*rq), priotree.node);
-
-               if (last && rq->ctx != last->ctx) {
-                       if (port != engine->execlist_port)
-                               break;
-
-                       i915_gem_request_assign(&port->request, last);
-                       nested_enable_signaling(last);
-                       port++;
+               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+               struct drm_i915_gem_request *rq, *rn;
+
+               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+                       if (last && rq->ctx != last->ctx) {
+                               if (port != engine->execlist_port) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               if (submit)
+                                       port_assign(port, last);
+                               port++;
+                       }
+
+                       INIT_LIST_HEAD(&rq->priotree.link);
+                       rq->priotree.priority = INT_MAX;
+
+                       i915_guc_submit(rq);
+                       trace_i915_gem_request_in(rq, port_index(port, engine));
+                       last = rq;
+                       submit = true;
                 }
  
                 rb = rb_next(rb);
-               rb_erase(&rq->priotree.node, &engine->execlist_queue);
-               RB_CLEAR_NODE(&rq->priotree.node);
-               rq->priotree.priority = INT_MAX;
-
-               i915_guc_submit(rq);
-               trace_i915_gem_request_in(rq, port - engine->execlist_port);
-               last = rq;
-               submit = true;
-       }
-       if (submit) {
-               i915_gem_request_assign(&port->request, last);
-               nested_enable_signaling(last);
-               engine->execlist_first = rb;
+               rb_erase(&p->node, &engine->execlist_queue);
+               INIT_LIST_HEAD(&p->requests);
+               if (p->priority != I915_PRIORITY_NORMAL)
+                       kmem_cache_free(engine->i915->priorities, p);
         }
+done:
+       engine->execlist_first = rb;
+       if (submit)
+               port_assign(port, last);
         spin_unlock_irq(&engine->timeline->lock);
  
         return submit;
@@ -705,17 +718,19 @@ static void i915_guc_irq_handler(unsigned long data)
         bool submit;
  
         do {
-               rq = port[0].request;
+               rq = port_request(&port[0]);
                 while (rq && i915_gem_request_completed(rq)) {
                         trace_i915_gem_request_out(rq);
                         i915_gem_request_put(rq);
-                       port[0].request = port[1].request;
-                       port[1].request = NULL;
-                       rq = port[0].request;
+
+                       port[0] = port[1];
+                       memset(&port[1], 0, sizeof(port[1]));
+
+                       rq = port_request(&port[0]);
                 }
  
                 submit = false;
-               if (!port[1].request)
+               if (!port_count(&port[1]))
                         submit = i915_guc_dequeue(engine);
         } while (submit);
  }
@@ -1053,8 +1068,7 @@ static int guc_ads_create(struct intel_guc *guc)
                 dev_priv->engine[RCS]->status_page.ggtt_offset;
  
         for_each_engine(engine, dev_priv, id)
-               blob->ads.eng_state_size[engine->guc_id] =
-                       intel_lr_context_size(engine);
+               blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
  
         base = guc_ggtt_offset(vma);
         blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 04493ef1d2f7a5c4108c697cfb4015b7e9c4ce33..7b7f55a28eec2038bcb94cb25353d8f0eaed386f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1200,7 +1200,7 @@ out:
  static void ivybridge_parity_work(struct work_struct *work)
  {
         struct drm_i915_private *dev_priv =
-               container_of(work, struct drm_i915_private, l3_parity.error_work);
+               container_of(work, typeof(*dev_priv), l3_parity.error_work);
         u32 error_status, row, bank, subbank;
         char *parity_event[6];
         uint32_t misccpctl;
@@ -1317,14 +1317,16 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
                 ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
  }
  
-static __always_inline void
+static void
  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
  {
         bool tasklet = false;
  
         if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-               set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-               tasklet = true;
+               if (port_count(&engine->execlist_port[0])) {
+                       __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+                       tasklet = true;
+               }
         }
  
         if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
@@ -2917,7 +2919,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
         u32 pipestat_mask;
         u32 enable_mask;
         enum pipe pipe;
-       u32 val;
  
         pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
                         PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2928,18 +2929,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
  
         enable_mask = I915_DISPLAY_PORT_INTERRUPT |
                 I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-               I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+               I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+               I915_LPE_PIPE_A_INTERRUPT |
+               I915_LPE_PIPE_B_INTERRUPT;
+
         if (IS_CHERRYVIEW(dev_priv))
-               enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+               enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+                       I915_LPE_PIPE_C_INTERRUPT;
  
         WARN_ON(dev_priv->irq_mask != ~0);
  
-       val = (I915_LPE_PIPE_A_INTERRUPT |
-               I915_LPE_PIPE_B_INTERRUPT |
-               I915_LPE_PIPE_C_INTERRUPT);
-
-       enable_mask |= val;
-
         dev_priv->irq_mask = ~enable_mask;
  
         GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -4197,11 +4196,15 @@ static void i965_irq_uninstall(struct drm_device * dev)
  void intel_irq_init(struct drm_i915_private *dev_priv)
  {
         struct drm_device *dev = &dev_priv->drm;
+       int i;
  
         intel_hpd_init_work(dev_priv);
  
         INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
+
         INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+       for (i = 0; i < MAX_L3_SLICES; ++i)
+               dev_priv->l3_parity.remap_info[i] = NULL;
  
         if (HAS_GUC_SCHED(dev_priv))
                 dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -4326,6 +4329,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
         }
  }
  
+/**
+ * intel_irq_fini - deinitializes IRQ support
+ * @i915: i915 device instance
+ *
+ * This function deinitializes all the IRQ support.
+ */
+void intel_irq_fini(struct drm_i915_private *i915)
+{
+       int i;
+
+       for (i = 0; i < MAX_L3_SLICES; ++i)
+               kfree(i915->l3_parity.remap_info[i]);
+}
+
  /**
   * intel_irq_install - enables the hardware interrupt
   * @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c

index f87b0c4e564d8b85de91e93f7a8d9a6e6f219b61..f80db2ccd92fa040e97e8478ed2ec2097afd0b92 100644 (file)
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = {
         .has_rc6 = 1, \
         .has_rc6p = 1, \
         .has_gmbus_irq = 1, \
-       .has_hw_contexts = 1, \
         .has_aliasing_ppgtt = 1, \
         GEN_DEFAULT_PIPEOFFSETS, \
         CURSOR_OFFSETS
@@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = {
         .has_rc6 = 1, \
         .has_rc6p = 1, \
         .has_gmbus_irq = 1, \
-       .has_hw_contexts = 1, \
         .has_aliasing_ppgtt = 1, \
         .has_full_ppgtt = 1, \
         GEN_DEFAULT_PIPEOFFSETS, \
@@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = {
         .has_runtime_pm = 1,
         .has_rc6 = 1,
         .has_gmbus_irq = 1,
-       .has_hw_contexts = 1,
         .has_gmch_display = 1,
         .has_hotplug = 1,
         .has_aliasing_ppgtt = 1,
@@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = {
         .has_resource_streamer = 1,
         .has_rc6 = 1,
         .has_gmbus_irq = 1,
-       .has_hw_contexts = 1,
         .has_logical_ring_contexts = 1,
         .has_gmch_display = 1,
         .has_aliasing_ppgtt = 1,
@@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
         .has_rc6 = 1, \
         .has_dp_mst = 1, \
         .has_gmbus_irq = 1, \
-       .has_hw_contexts = 1, \
         .has_logical_ring_contexts = 1, \
         .has_guc = 1, \
         .has_decoupled_mmio = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c

index 060b171480d550596e17f96cb2022c95001ff56f..85269bcc8372c623021d278942e1cc3461cbeb95 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -205,25 +205,49 @@
  
  #define OA_TAKEN(tail, head)   ((tail - head) & (OA_BUFFER_SIZE - 1))
  
-/* There's a HW race condition between OA unit tail pointer register updates and
+/**
+ * DOC: OA Tail Pointer Race
+ *
+ * There's a HW race condition between OA unit tail pointer register updates and
   * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far.
+ * been written out to the OA buffer so far (in terms of what's visible to the
+ * CPU).
+ *
+ * Although this can be observed explicitly while copying reports to userspace
+ * by checking for a zeroed report-id field in tail reports, we want to account
+ * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant
+ * read() attempts.
+ *
+ * In effect we define a tail pointer for reading that lags the real tail
+ * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
+ * time for the corresponding reports to become visible to the CPU.
+ *
+ * To manage this we actually track two tail pointers:
+ *  1) An 'aging' tail with an associated timestamp that is tracked until we
+ *     can trust the corresponding data is visible to the CPU; at which point
+ *     it is considered 'aged'.
+ *  2) An 'aged' tail that can be used for read()ing.
+ *
+ * The two separate pointers let us decouple read()s from tail pointer aging.
+ *
+ * The tail pointers are checked and updated at a limited rate within a hrtimer
+ * callback (the same callback that is used for delivering POLLIN events)
   *
- * Although this can be observed explicitly by checking for a zeroed report-id
- * field in tail reports, it seems preferable to account for this earlier e.g.
- * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
- * in this situation.
+ * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
+ * indicates that an updated tail pointer is needed.
   *
- * To give time for the most recent reports to land before they may be copied to
- * userspace, the driver operates as if the tail pointer effectively lags behind
- * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
- * based on this constant in nanoseconds, the current OA sampling exponent
- * and current report size.
+ * Most of the implementation details for this workaround are in
+ * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports()
   *
- * There is also a fallback check while reading to simply skip over reports with
- * a zeroed report-id.
+ * Note for posterity: previously the driver used to define an effective tail
+ * pointer that lagged the real pointer by a 'tail margin' measured in bytes
+ * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
+ * This was flawed considering that the OA unit may also automatically generate
+ * non-periodic reports (such as on context switch) or the OA unit may be
+ * enabled without any periodic sampling.
   */
  #define OA_TAIL_MARGIN_NSEC    100000ULL
+#define INVALID_TAIL_PTR       0xffffffff
  
  /* frequency for checking whether the OA unit has written new reports to the
   * circular OA buffer...
@@ -308,27 +332,121 @@ struct perf_open_properties {
         int oa_period_exponent;
  };
  
-/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
+/**
+ * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
+ * @dev_priv: i915 device instance
+ *
+ * This is either called via fops (for blocking reads in user ctx) or the poll
+ * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
+ * if there is data available for userspace to read.
   *
- * It's safe to read OA config state here unlocked, assuming that this is only
- * called while the stream is enabled, while the global OA configuration can't
- * be modified.
+ * This function is central to providing a workaround for the OA unit tail
+ * pointer having a race with respect to what data is visible to the CPU.
+ * It is responsible for reading tail pointers from the hardware and giving
+ * the pointers time to 'age' before they are made available for reading.
+ * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
   *
- * Note: we don't lock around the head/tail reads even though there's the slim
- * possibility of read() fop errors forcing a re-init of the OA buffer
- * pointers.  A race here could result in a false positive !empty status which
- * is acceptable.
+ * Besides returning true when there is data available to read() this function
+ * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
+ * and .aged_tail_idx state used for reading.
+ *
+ * Note: It's safe to read OA config state here unlocked, assuming that this is
+ * only called while the stream is enabled, while the global OA configuration
+ * can't be modified.
+ *
+ * Returns: %true if the OA buffer contains data, else %false
   */
-static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
+static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
  {
         int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-       u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
-       u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-       u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-       u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+       unsigned long flags;
+       unsigned int aged_idx;
+       u32 oastatus1;
+       u32 head, hw_tail, aged_tail, aging_tail;
+       u64 now;
+
+       /* We have to consider the (unlikely) possibility that read() errors
+        * could result in an OA buffer reset which might reset the head,
+        * tails[] and aged_tail state.
+        */
+       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+       /* NB: The head we observe here might effectively be a little out of
+        * date (between head and tails[aged_idx].offset if there is currently
+        * a read() in progress.
+        */
+       head = dev_priv->perf.oa.oa_buffer.head;
+
+       aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
+       aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
+       aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
  
-       return OA_TAKEN(tail, head) <
-               dev_priv->perf.oa.tail_margin + report_size;
+       oastatus1 = I915_READ(GEN7_OASTATUS1);
+       hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+
+       /* The tail pointer increases in 64 byte increments,
+        * not in report_size steps...
+        */
+       hw_tail &= ~(report_size - 1);
+
+       now = ktime_get_mono_fast_ns();
+
+       /* Update the aged tail
+        *
+        * Flip the tail pointer available for read()s once the aging tail is
+        * old enough to trust that the corresponding data will be visible to
+        * the CPU...
+        *
+        * Do this before updating the aging pointer in case we may be able to
+        * immediately start aging a new pointer too (if new data has become
+        * available) without needing to wait for a later hrtimer callback.
+        */
+       if (aging_tail != INVALID_TAIL_PTR &&
+           ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
+            OA_TAIL_MARGIN_NSEC)) {
+               aged_idx ^= 1;
+               dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
+
+               aged_tail = aging_tail;
+
+               /* Mark that we need a new pointer to start aging... */
+               dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
+               aging_tail = INVALID_TAIL_PTR;
+       }
+
+       /* Update the aging tail
+        *
+        * We throttle aging tail updates until we have a new tail that
+        * represents >= one report more data than is already available for
+        * reading. This ensures there will be enough data for a successful
+        * read once this new pointer has aged and ensures we will give the new
+        * pointer time to age.
+        */
+       if (aging_tail == INVALID_TAIL_PTR &&
+           (aged_tail == INVALID_TAIL_PTR ||
+            OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
+               struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
+               u32 gtt_offset = i915_ggtt_offset(vma);
+
+               /* Be paranoid and do a bounds check on the pointer read back
+                * from hardware, just in case some spurious hardware condition
+                * could put the tail out of bounds...
+                */
+               if (hw_tail >= gtt_offset &&
+                   hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
+                       dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
+                               aging_tail = hw_tail;
+                       dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
+               } else {
+                       DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
+                                 hw_tail);
+               }
+       }
+
+       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+       return aged_tail == INVALID_TAIL_PTR ?
+               false : OA_TAKEN(aged_tail, head) >= report_size;
  }
  
  /**
@@ -421,8 +539,6 @@ static int append_oa_sample(struct i915_perf_stream *stream,
   * @buf: destination buffer given by userspace
   * @count: the number of bytes userspace wants to read
   * @offset: (inout): the current position for writing into @buf
- * @head_ptr: (inout): the current oa buffer cpu read position
- * @tail: the current oa buffer gpu write position
   *
   * Notably any error condition resulting in a short read (-%ENOSPC or
   * -%EFAULT) will be returned even though one or more records may
@@ -431,7 +547,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
   * userspace.
   *
   * Note: reports are consumed from the head, and appended to the
- * tail, so the head chases the tail?... If you think that's mad
+ * tail, so the tail chases the head?... If you think that's mad
   * and back-to-front you're not alone, but this follows the
   * Gen PRM naming convention.
   *
@@ -440,57 +556,55 @@ static int append_oa_sample(struct i915_perf_stream *stream,
  static int gen7_append_oa_reports(struct i915_perf_stream *stream,
                                   char __user *buf,
                                   size_t count,
-                                 size_t *offset,
-                                 u32 *head_ptr,
-                                 u32 tail)
+                                 size_t *offset)
  {
         struct drm_i915_private *dev_priv = stream->dev_priv;
         int report_size = dev_priv->perf.oa.oa_buffer.format_size;
         u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
-       int tail_margin = dev_priv->perf.oa.tail_margin;
         u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
         u32 mask = (OA_BUFFER_SIZE - 1);
-       u32 head;
+       size_t start_offset = *offset;
+       unsigned long flags;
+       unsigned int aged_tail_idx;
+       u32 head, tail;
         u32 taken;
         int ret = 0;
  
         if (WARN_ON(!stream->enabled))
                 return -EIO;
  
-       head = *head_ptr - gtt_offset;
-       tail -= gtt_offset;
+       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  
-       /* The OA unit is expected to wrap the tail pointer according to the OA
-        * buffer size and since we should never write a misaligned head
-        * pointer we don't expect to read one back either...
-        */
-       if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
-           head % report_size) {
-               DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
-                         head, tail);
-               dev_priv->perf.oa.ops.oa_disable(dev_priv);
-               dev_priv->perf.oa.ops.oa_enable(dev_priv);
-               *head_ptr = I915_READ(GEN7_OASTATUS2) &
-                       GEN7_OASTATUS2_HEAD_MASK;
-               return -EIO;
-       }
+       head = dev_priv->perf.oa.oa_buffer.head;
+       aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
+       tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
  
+       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  
-       /* The tail pointer increases in 64 byte increments, not in report_size
-        * steps...
+       /* An invalid tail pointer here means we're still waiting for the poll
+        * hrtimer callback to give us a pointer
          */
-       tail &= ~(report_size - 1);
+       if (tail == INVALID_TAIL_PTR)
+               return -EAGAIN;
  
-       /* Move the tail pointer back by the current tail_margin to account for
-        * the possibility that the latest reports may not have really landed
-        * in memory yet...
+       /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
+        * while indexing relative to oa_buf_base.
          */
+       head -= gtt_offset;
+       tail -= gtt_offset;
  
-       if (OA_TAKEN(tail, head) < report_size + tail_margin)
-               return -EAGAIN;
+       /* An out of bounds or misaligned head or tail pointer implies a driver
+        * bug since we validate + align the tail pointers we read from the
+        * hardware and we are in full control of the head pointer which should
+        * only be incremented by multiples of the report size (notably also
+        * all a power of two).
+        */
+       if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
+                     tail > OA_BUFFER_SIZE || tail % report_size,
+                     "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
+                     head, tail))
+               return -EIO;
  
-       tail -= tail_margin;
-       tail &= mask;
  
         for (/* none */;
              (taken = OA_TAKEN(tail, head));
@@ -518,7 +632,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
                  * copying it to userspace...
                  */
                 if (report32[0] == 0) {
-                       DRM_NOTE("Skipping spurious, invalid OA report\n");
+                       if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+                               DRM_NOTE("Skipping spurious, invalid OA report\n");
                         continue;
                 }
  
@@ -535,7 +650,21 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
                 report32[0] = 0;
         }
  
-       *head_ptr = gtt_offset + head;
+       if (start_offset != *offset) {
+               spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+               /* We removed the gtt_offset for the copy loop above, indexing
+                * relative to oa_buf_base so put back here...
+                */
+               head += gtt_offset;
+
+               I915_WRITE(GEN7_OASTATUS2,
+                          ((head & GEN7_OASTATUS2_HEAD_MASK) |
+                           OA_MEM_SELECT_GGTT));
+               dev_priv->perf.oa.oa_buffer.head = head;
+
+               spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+       }
  
         return ret;
  }
@@ -562,22 +691,14 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
                         size_t *offset)
  {
         struct drm_i915_private *dev_priv = stream->dev_priv;
-       int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-       u32 oastatus2;
         u32 oastatus1;
-       u32 head;
-       u32 tail;
         int ret;
  
         if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
                 return -EIO;
  
-       oastatus2 = I915_READ(GEN7_OASTATUS2);
         oastatus1 = I915_READ(GEN7_OASTATUS1);
  
-       head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-       tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
-
         /* XXX: On Haswell we don't have a safe way to clear oastatus1
          * bits while the OA unit is enabled (while the tail pointer
          * may be updated asynchronously) so we ignore status bits
@@ -616,11 +737,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
                 dev_priv->perf.oa.ops.oa_disable(dev_priv);
                 dev_priv->perf.oa.ops.oa_enable(dev_priv);
  
-               oastatus2 = I915_READ(GEN7_OASTATUS2);
                 oastatus1 = I915_READ(GEN7_OASTATUS1);
-
-               head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-               tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
         }
  
         if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -632,29 +749,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
                         GEN7_OASTATUS1_REPORT_LOST;
         }
  
-       ret = gen7_append_oa_reports(stream, buf, count, offset,
-                                    &head, tail);
-
-       /* All the report sizes are a power of two and the
-        * head should always be incremented by some multiple
-        * of the report size.
-        *
-        * A warning here, but notably if we later read back a
-        * misaligned pointer we will treat that as a bug since
-        * it could lead to a buffer overrun.
-        */
-       WARN_ONCE(head & (report_size - 1),
-                 "i915: Writing misaligned OA head pointer");
-
-       /* Note: we update the head pointer here even if an error
-        * was returned since the error may represent a short read
-        * where some some reports were successfully copied.
-        */
-       I915_WRITE(GEN7_OASTATUS2,
-                  ((head & GEN7_OASTATUS2_HEAD_MASK) |
-                   OA_MEM_SELECT_GGTT));
-
-       return ret;
+       return gen7_append_oa_reports(stream, buf, count, offset);
  }
  
  /**
@@ -679,14 +774,8 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
         if (!dev_priv->perf.oa.periodic)
                 return -EIO;
  
-       /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
-        * just performs mmio reads of the OA buffer head + tail pointers and
-        * it's assumed we're handling some operation that implies the stream
-        * can't be destroyed until completion (such as a read()) that ensures
-        * the device + OA buffer can't disappear
-        */
         return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
-                                       !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
+                                       dev_priv->perf.oa.ops.oa_buffer_check(dev_priv));
  }
  
  /**
@@ -744,6 +833,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  {
         struct drm_i915_private *dev_priv = stream->dev_priv;
         struct intel_engine_cs *engine = dev_priv->engine[RCS];
+       struct intel_ring *ring;
         int ret;
  
         ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -755,9 +845,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
          *
          * NB: implied RCS engine...
          */
-       ret = engine->context_pin(engine, stream->ctx);
-       if (ret)
-               goto unlock;
+       ring = engine->context_pin(engine, stream->ctx);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
+       if (IS_ERR(ring))
+               return PTR_ERR(ring);
  
         /* Explicitly track the ID (instead of calling i915_ggtt_offset()
          * on the fly) considering the difference with gen8+ and
@@ -766,10 +857,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
         dev_priv->perf.oa.specific_ctx_id =
                 i915_ggtt_offset(stream->ctx->engine[engine->id].state);
  
-unlock:
-       mutex_unlock(&dev_priv->drm.struct_mutex);
-
-       return ret;
+       return 0;
  }
  
  /**
@@ -824,19 +912,36 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
                 oa_put_render_ctx_id(stream);
  
         dev_priv->perf.oa.exclusive_stream = NULL;
+
+       if (dev_priv->perf.oa.spurious_report_rs.missed) {
+               DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
+                        dev_priv->perf.oa.spurious_report_rs.missed);
+       }
  }
  
  static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
  {
         u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+       unsigned long flags;
+
+       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
  
         /* Pre-DevBDW: OABUFFER must be set with counters off,
          * before OASTATUS1, but after OASTATUS2
          */
         I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
+       dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+
         I915_WRITE(GEN7_OABUFFER, gtt_offset);
+
         I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
  
+       /* Mark that we need updated tail pointers to read from... */
+       dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+       dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+
+       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
         /* On Haswell we have to track which OASTATUS1 flags we've
          * already seen since they can't be cleared while periodic
          * sampling is enabled.
@@ -1094,12 +1199,6 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
                 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
  }
  
-static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
-{
-       return div_u64(1000000000ULL * (2ULL << exponent),
-                      dev_priv->perf.oa.timestamp_frequency);
-}
-
  static const struct i915_perf_stream_ops i915_oa_stream_ops = {
         .destroy = i915_oa_stream_destroy,
         .enable = i915_oa_stream_enable,
@@ -1173,6 +1272,26 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
                 return -EINVAL;
         }
  
+       /* We set up some ratelimit state to potentially throttle any _NOTES
+        * about spurious, invalid OA reports which we don't forward to
+        * userspace.
+        *
+        * The initialization is associated with opening the stream (not driver
+        * init) considering we print a _NOTE about any throttling when closing
+        * the stream instead of waiting until driver _fini which no one would
+        * ever see.
+        *
+        * Using the same limiting factors as printk_ratelimit()
+        */
+       ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
+                            5 * HZ, 10);
+       /* Since we use a DRM_NOTE for spurious reports it would be
+        * inconsistent to let __ratelimit() automatically print a warning for
+        * throttling.
+        */
+       ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
+                           RATELIMIT_MSG_ON_RELEASE);
+
         stream->sample_size = sizeof(struct drm_i915_perf_record_header);
  
         format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -1190,20 +1309,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
         dev_priv->perf.oa.metrics_set = props->metrics_set;
  
         dev_priv->perf.oa.periodic = props->oa_periodic;
-       if (dev_priv->perf.oa.periodic) {
-               u32 tail;
-
+       if (dev_priv->perf.oa.periodic)
                 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
  
-               /* See comment for OA_TAIL_MARGIN_NSEC for details
-                * about this tail_margin...
-                */
-               tail = div64_u64(OA_TAIL_MARGIN_NSEC,
-                                oa_exponent_to_ns(dev_priv,
-                                                  props->oa_period_exponent));
-               dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
-       }
-
         if (stream->ctx) {
                 ret = oa_get_render_ctx_id(stream);
                 if (ret)
@@ -1352,7 +1460,15 @@ static ssize_t i915_perf_read(struct file *file,
                 mutex_unlock(&dev_priv->perf.lock);
         }
  
-       if (ret >= 0) {
+       /* We allow the poll checking to sometimes report false positive POLLIN
+        * events where we might actually report EAGAIN on read() if there's
+        * not really any data available. In this situation though we don't
+        * want to enter a busy loop between poll() reporting a POLLIN event
+        * and read() returning -EAGAIN. Clearing the oa.pollin state here
+        * effectively ensures we back off until the next hrtimer callback
+        * before reporting another POLLIN event.
+        */
+       if (ret >= 0 || ret == -EAGAIN) {
                 /* Maybe make ->pollin per-stream state if we support multiple
                  * concurrent streams in the future.
                  */
@@ -1368,7 +1484,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
                 container_of(hrtimer, typeof(*dev_priv),
                              perf.oa.poll_check_timer);
  
-       if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
+       if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) {
                 dev_priv->perf.oa.pollin = true;
                 wake_up(&dev_priv->perf.oa.poll_wq);
         }
@@ -1817,11 +1933,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
                         break;
                 case DRM_I915_PERF_PROP_OA_FORMAT:
                         if (value == 0 || value >= I915_OA_FORMAT_MAX) {
-                               DRM_DEBUG("Invalid OA report format\n");
+                               DRM_DEBUG("Out-of-range OA report format %llu\n",
+                                         value);
                                 return -EINVAL;
                         }
                         if (!dev_priv->perf.oa.oa_formats[value].size) {
-                               DRM_DEBUG("Invalid OA report format\n");
+                               DRM_DEBUG("Unsupported OA report format %llu\n",
+                                         value);
                                 return -EINVAL;
                         }
                         props->oa_format = value;
@@ -2063,6 +2181,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
         INIT_LIST_HEAD(&dev_priv->perf.streams);
         mutex_init(&dev_priv->perf.lock);
         spin_lock_init(&dev_priv->perf.hook_lock);
+       spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
  
         dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
         dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
@@ -2070,10 +2189,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
         dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
         dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
         dev_priv->perf.oa.ops.read = gen7_oa_read;
-       dev_priv->perf.oa.ops.oa_buffer_is_empty =
-               gen7_oa_buffer_is_empty_fop_unlocked;
-
-       dev_priv->perf.oa.timestamp_frequency = 12500000;
+       dev_priv->perf.oa.ops.oa_buffer_check =
+               gen7_oa_buffer_check_unlocked;
  
         dev_priv->perf.oa.oa_formats = hsw_oa_formats;
  
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index 11b12f4124920b07a65aaf7e02b81c8784bece46..89888adb9af170b5792593df231784234b0f3183 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  #define VECS_HW                3
  #define VCS2_HW                4
  
+/* Engine class */
+
+#define RENDER_CLASS           0
+#define VIDEO_DECODE_CLASS     1
+#define VIDEO_ENHANCEMENT_CLASS        2
+#define COPY_ENGINE_CLASS      3
+#define OTHER_CLASS            4
+
  /* PCI config space */
  
  #define MCHBAR_I915 0x44
@@ -3051,10 +3059,14 @@ enum skl_disp_power_wells {
  #define CLKCFG_FSB_667                                 (3 << 0)        /* hrawclk 166 */
  #define CLKCFG_FSB_800                                 (2 << 0)        /* hrawclk 200 */
  #define CLKCFG_FSB_1067                                        (6 << 0)        /* hrawclk 266 */
+#define CLKCFG_FSB_1067_ALT                            (0 << 0)        /* hrawclk 266 */
  #define CLKCFG_FSB_1333                                        (7 << 0)        /* hrawclk 333 */
-/* Note, below two are guess */
-#define CLKCFG_FSB_1600                                        (4 << 0)        /* hrawclk 400 */
-#define CLKCFG_FSB_1600_ALT                            (0 << 0)        /* hrawclk 400 */
+/*
+ * Note that on at least on ELK the below value is reported for both
+ * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet
+ * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz.
+ */
+#define CLKCFG_FSB_1333_ALT                            (4 << 0)        /* hrawclk 333 */
  #define CLKCFG_FSB_MASK                                        (7 << 0)
  #define CLKCFG_MEM_533                                 (1 << 4)
  #define CLKCFG_MEM_667                                 (2 << 4)
@@ -3362,16 +3374,6 @@ enum skl_disp_power_wells {
  #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f)
  #define GEN7_CXT_TOTAL_SIZE(ctx_reg)   (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
                                          GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE             (17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE            (18 * PAGE_SIZE)
  
  enum {
         INTEL_ADVANCED_CONTEXT = 0,
@@ -5437,9 +5439,7 @@ enum {
  #define   CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX)
  #define   CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX)
  #define   CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
-#define   MCURSOR_PIPE_SELECT  (1 << 28)
-#define   MCURSOR_PIPE_A       0x00
-#define   MCURSOR_PIPE_B       (1 << 28)
+#define   MCURSOR_PIPE_SELECT(pipe)    ((pipe) << 28)
  #define   MCURSOR_GAMMA_ENABLE  (1 << 26)
  #define   CURSOR_ROTATE_180    (1<<15)
  #define   CURSOR_TRICKLE_FEED_DISABLE  (1 << 14)
@@ -5449,7 +5449,9 @@ enum {
  #define   CURSOR_POS_SIGN       0x8000
  #define   CURSOR_X_SHIFT        0
  #define   CURSOR_Y_SHIFT        16
-#define CURSIZE                        _MMIO(0x700a0)
+#define CURSIZE                        _MMIO(0x700a0) /* 845/865 */
+#define _CUR_FBC_CTL_A         0x700a0 /* ivb+ */
+#define   CUR_FBC_CTL_EN       (1 << 31)
  #define _CURBCNTR              0x700c0
  #define _CURBBASE              0x700c4
  #define _CURBPOS               0x700c8
@@ -5465,6 +5467,7 @@ enum {
  #define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR)
  #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
  #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
+#define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A)
  
  #define CURSOR_A_OFFSET 0x70080
  #define CURSOR_B_OFFSET 0x700c0
@@ -5497,8 +5500,7 @@ enum {
  #define   DISPPLANE_PIPE_CSC_ENABLE            (1<<24)
  #define   DISPPLANE_SEL_PIPE_SHIFT             24
  #define   DISPPLANE_SEL_PIPE_MASK              (3<<DISPPLANE_SEL_PIPE_SHIFT)
-#define   DISPPLANE_SEL_PIPE_A                 0
-#define   DISPPLANE_SEL_PIPE_B                 (1<<DISPPLANE_SEL_PIPE_SHIFT)
+#define   DISPPLANE_SEL_PIPE(pipe)             ((pipe)<<DISPPLANE_SEL_PIPE_SHIFT)
  #define   DISPPLANE_SRC_KEY_ENABLE             (1<<22)
  #define   DISPPLANE_SRC_KEY_DISABLE            0
  #define   DISPPLANE_LINE_DOUBLE                        (1<<20)
@@ -8276,7 +8278,7 @@ enum {
  
  /* MIPI DSI registers */
  
-#define _MIPI_PORT(port, a, c) ((port) ? c : a)        /* ports A and C only */
+#define _MIPI_PORT(port, a, c) (((port) == PORT_A) ? a : c)    /* ports A and C only */
  #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c))
  
  #define MIPIO_TXESC_CLK_DIV1                   _MMIO(0x160004)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c

index a277f8eb7beb8b5e12d5919275b63163a7ab2c5f..474d23c0c0cebaf7524b16631b7b64c92df50729 100644 (file)
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
  #include <linux/reservation.h>
  
  #include "i915_sw_fence.h"
+#include "i915_selftest.h"
  
  #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
  
@@ -120,34 +121,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
  }
  #endif
  
-static void i915_sw_fence_release(struct kref *kref)
-{
-       struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
-
-       WARN_ON(atomic_read(&fence->pending) > 0);
-       debug_fence_destroy(fence);
-
-       if (fence->flags & I915_SW_FENCE_MASK) {
-               __i915_sw_fence_notify(fence, FENCE_FREE);
-       } else {
-               i915_sw_fence_fini(fence);
-               kfree(fence);
-       }
-}
-
-static void i915_sw_fence_put(struct i915_sw_fence *fence)
-{
-       debug_fence_assert(fence);
-       kref_put(&fence->kref, i915_sw_fence_release);
-}
-
-static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
-{
-       debug_fence_assert(fence);
-       kref_get(&fence->kref);
-       return fence;
-}
-
  static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
                                         struct list_head *continuation)
  {
@@ -202,13 +175,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
  
         debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
  
-       if (fence->flags & I915_SW_FENCE_MASK &&
-           __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+       if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
                 return;
  
         debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
  
         __i915_sw_fence_wake_up_all(fence, continuation);
+
+       debug_fence_destroy(fence);
+       __i915_sw_fence_notify(fence, FENCE_FREE);
  }
  
  static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +207,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
                           const char *name,
                           struct lock_class_key *key)
  {
-       BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+       BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
  
         debug_fence_init(fence);
  
         __init_waitqueue_head(&fence->wait, name, key);
-       kref_init(&fence->kref);
         atomic_set(&fence->pending, 1);
         fence->flags = (unsigned long)fn;
  }
  
-static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
-{
-       i915_sw_fence_complete(fence);
-       i915_sw_fence_put(fence);
-}
-
  void i915_sw_fence_commit(struct i915_sw_fence *fence)
  {
         debug_fence_activate(fence);
-       __i915_sw_fence_commit(fence);
+       i915_sw_fence_complete(fence);
  }
  
  static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
  {
         list_del(&wq->task_list);
         __i915_sw_fence_complete(wq->private, key);
-       i915_sw_fence_put(wq->private);
+
         if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
                 kfree(wq);
         return 0;
@@ -307,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
         unsigned long flags;
         bool err;
  
-       if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+       if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
                 return false;
  
         spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -353,7 +321,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
         INIT_LIST_HEAD(&wq->task_list);
         wq->flags = pending;
         wq->func = i915_sw_fence_wake;
-       wq->private = i915_sw_fence_get(fence);
+       wq->private = fence;
  
         i915_sw_fence_await(fence);
  
@@ -402,7 +370,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
         dma_fence_put(cb->dma);
         cb->dma = NULL;
  
-       __i915_sw_fence_commit(cb->fence);
+       i915_sw_fence_complete(cb->fence);
         cb->timer.function = NULL;
  }
  
@@ -413,7 +381,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
  
         del_timer_sync(&cb->timer);
         if (cb->timer.function)
-               __i915_sw_fence_commit(cb->fence);
+               i915_sw_fence_complete(cb->fence);
         dma_fence_put(cb->dma);
  
         kfree(cb);
@@ -440,7 +408,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
                 return dma_fence_wait(dma, false);
         }
  
-       cb->fence = i915_sw_fence_get(fence);
+       cb->fence = fence;
         i915_sw_fence_await(fence);
  
         cb->dma = NULL;
@@ -523,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
  
         return ret;
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h

index d31cefbbcc0433528479ced3e987d890c11e57eb..1d3b6051daaf5af62b732744c0ccc4f540ad1c41 100644 (file)
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
  struct i915_sw_fence {
         wait_queue_head_t wait;
         unsigned long flags;
-       struct kref kref;
         atomic_t pending;
  };
  
diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c

new file mode 100644 (file)

index 0000000..0087acf
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "i915_syncmap.h"
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+#include "i915_selftest.h"
+
+#define SHIFT ilog2(KSYNCMAP)
+#define MASK (KSYNCMAP - 1)
+
+/*
+ * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
+ * context id to the last u32 fence seqno waited upon from that context.
+ * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
+ * the root. This allows us to access the whole tree via a single pointer
+ * to the most recently used layer. We expect fence contexts to be dense
+ * and most reuse to be on the same i915_gem_context but on neighbouring
+ * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
+ * effective lookup cache. If the new lookup is not on the same leaf, we
+ * expect it to be on the neighbouring branch.
+ *
+ * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
+ * allows us to store whether a particular seqno is valid (i.e. allows us
+ * to distinguish unset from 0).
+ *
+ * A branch holds an array of layer pointers, and has height > 0, and always
+ * has at least 2 layers (either branches or leaves) below it.
+ *
+ * For example,
+ *     for x in
+ *       0 1 2 0x10 0x11 0x200 0x201
+ *       0x500000 0x500001 0x503000 0x503001
+ *       0xE<<60:
+ *             i915_syncmap_set(&sync, x, lower_32_bits(x));
+ * will build a tree like:
+ *     0xXXXXXXXXXXXXXXXX
+ *     0-> 0x0000000000XXXXXX
+ *     |   0-> 0x0000000000000XXX
+ *     |   |   0-> 0x00000000000000XX
+ *     |   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
+ *     |   |   |   1-> 0x000000000000001X 0:10, 1:11
+ *     |   |   2-> 0x000000000000020X 0:200, 1:201
+ *     |   5-> 0x000000000050XXXX
+ *     |       0-> 0x000000000050000X 0:500000, 1:500001
+ *     |       3-> 0x000000000050300X 0:503000, 1:503001
+ *     e-> 0xe00000000000000X e:e
+ */
+
+struct i915_syncmap {
+       u64 prefix;
+       unsigned int height;
+       unsigned int bitmap;
+       struct i915_syncmap *parent;
+       /*
+        * Following this header is an array of either seqno or child pointers:
+        * union {
+        *      u32 seqno[KSYNCMAP];
+        *      struct i915_syncmap *child[KSYNCMAP];
+        * };
+        */
+};
+
+/**
+ * i915_syncmap_init -- initialise the #i915_syncmap
+ * @root - pointer to the #i915_syncmap
+ */
+void i915_syncmap_init(struct i915_syncmap **root)
+{
+       BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
+       BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
+       BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
+       *root = NULL;
+}
+
+static inline u32 *__sync_seqno(struct i915_syncmap *p)
+{
+       GEM_BUG_ON(p->height);
+       return (u32 *)(p + 1);
+}
+
+static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
+{
+       GEM_BUG_ON(!p->height);
+       return (struct i915_syncmap **)(p + 1);
+}
+
+static inline unsigned int
+__sync_branch_idx(const struct i915_syncmap *p, u64 id)
+{
+       return (id >> p->height) & MASK;
+}
+
+static inline unsigned int
+__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
+{
+       GEM_BUG_ON(p->height);
+       return id & MASK;
+}
+
+static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
+{
+       return id >> p->height >> SHIFT;
+}
+
+static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
+{
+       GEM_BUG_ON(p->height);
+       return id >> SHIFT;
+}
+
+static inline bool seqno_later(u32 a, u32 b)
+{
+       return (s32)(a - b) >= 0;
+}
+
+/**
+ * i915_syncmap_is_later -- compare against the last know sync point
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we are synchronising to
+ * @seqno - the sequence number along the other timeline
+ *
+ * If we have already synchronised this @root timeline with another (@id) then
+ * we can omit any repeated or earlier synchronisation requests. If the two
+ * timelines are already coupled, we can also omit the dependency between the
+ * two as that is already known via the timeline.
+ *
+ * Returns true if the two timelines are already synchronised wrt to @seqno,
+ * false if not and the synchronisation must be emitted.
+ */
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+       struct i915_syncmap *p;
+       unsigned int idx;
+
+       p = *root;
+       if (!p)
+               return false;
+
+       if (likely(__sync_leaf_prefix(p, id) == p->prefix))
+               goto found;
+
+       /* First climb the tree back to a parent branch */
+       do {
+               p = p->parent;
+               if (!p)
+                       return false;
+
+               if (__sync_branch_prefix(p, id) == p->prefix)
+                       break;
+       } while (1);
+
+       /* And then descend again until we find our leaf */
+       do {
+               if (!p->height)
+                       break;
+
+               p = __sync_child(p)[__sync_branch_idx(p, id)];
+               if (!p)
+                       return false;
+
+               if (__sync_branch_prefix(p, id) != p->prefix)
+                       return false;
+       } while (1);
+
+       *root = p;
+found:
+       idx = __sync_leaf_idx(p, id);
+       if (!(p->bitmap & BIT(idx)))
+               return false;
+
+       return seqno_later(__sync_seqno(p)[idx], seqno);
+}
+
+static struct i915_syncmap *
+__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
+{
+       struct i915_syncmap *p;
+
+       p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
+       if (unlikely(!p))
+               return NULL;
+
+       p->parent = parent;
+       p->height = 0;
+       p->bitmap = 0;
+       p->prefix = __sync_leaf_prefix(p, id);
+       return p;
+}
+
+static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
+{
+       unsigned int idx = __sync_leaf_idx(p, id);
+
+       p->bitmap |= BIT(idx);
+       __sync_seqno(p)[idx] = seqno;
+}
+
+static inline void __sync_set_child(struct i915_syncmap *p,
+                                   unsigned int idx,
+                                   struct i915_syncmap *child)
+{
+       p->bitmap |= BIT(idx);
+       __sync_child(p)[idx] = child;
+}
+
+static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+       struct i915_syncmap *p = *root;
+       unsigned int idx;
+
+       if (!p) {
+               p = __sync_alloc_leaf(NULL, id);
+               if (unlikely(!p))
+                       return -ENOMEM;
+
+               goto found;
+       }
+
+       /* Caller handled the likely cached case */
+       GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
+
+       /* Climb back up the tree until we find a common prefix */
+       do {
+               if (!p->parent)
+                       break;
+
+               p = p->parent;
+
+               if (__sync_branch_prefix(p, id) == p->prefix)
+                       break;
+       } while (1);
+
+       /*
+        * No shortcut, we have to descend the tree to find the right layer
+        * containing this fence.
+        *
+        * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
+        * or lower layers. Leaf nodes (height = 0) contain the fences, all
+        * other nodes (height > 0) are internal layers that point to a lower
+        * node. Each internal layer has at least 2 descendents.
+        *
+        * Starting at the top, we check whether the current prefix matches. If
+        * it doesn't, we have gone past our target and need to insert a join
+        * into the tree, and a new leaf node for the target as a descendent
+        * of the join, as well as the original layer.
+        *
+        * The matching prefix means we are still following the right branch
+        * of the tree. If it has height 0, we have found our leaf and just
+        * need to replace the fence slot with ourselves. If the height is
+        * not zero, our slot contains the next layer in the tree (unless
+        * it is empty, in which case we can add ourselves as a new leaf).
+        * As descend the tree the prefix grows (and height decreases).
+        */
+       do {
+               struct i915_syncmap *next;
+
+               if (__sync_branch_prefix(p, id) != p->prefix) {
+                       unsigned int above;
+
+                       /* Insert a join above the current layer */
+                       next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
+                                      GFP_KERNEL);
+                       if (unlikely(!next))
+                               return -ENOMEM;
+
+                       /* Compute the height at which these two diverge */
+                       above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
+                       above = round_up(above, SHIFT);
+                       next->height = above + p->height;
+                       next->prefix = __sync_branch_prefix(next, id);
+
+                       /* Insert the join into the parent */
+                       if (p->parent) {
+                               idx = __sync_branch_idx(p->parent, id);
+                               __sync_child(p->parent)[idx] = next;
+                               GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
+                       }
+                       next->parent = p->parent;
+
+                       /* Compute the idx of the other branch, not our id! */
+                       idx = p->prefix >> (above - SHIFT) & MASK;
+                       __sync_set_child(next, idx, p);
+                       p->parent = next;
+
+                       /* Ascend to the join */
+                       p = next;
+               } else {
+                       if (!p->height)
+                               break;
+               }
+
+               /* Descend into the next layer */
+               GEM_BUG_ON(!p->height);
+               idx = __sync_branch_idx(p, id);
+               next = __sync_child(p)[idx];
+               if (!next) {
+                       next = __sync_alloc_leaf(p, id);
+                       if (unlikely(!next))
+                               return -ENOMEM;
+
+                       __sync_set_child(p, idx, next);
+                       p = next;
+                       break;
+               }
+
+               p = next;
+       } while (1);
+
+found:
+       GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
+       __sync_set_seqno(p, id, seqno);
+       *root = p;
+       return 0;
+}
+
+/**
+ * i915_syncmap_set -- mark the most recent syncpoint between contexts
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we have synchronised to
+ * @seqno - the sequence number along the other timeline
+ *
+ * When we synchronise this @root timeline with another (@id), we also know
+ * that we have synchronized with all previous seqno along that timeline. If
+ * we then have a request to synchronise with the same seqno or older, we can
+ * omit it, see i915_syncmap_is_later()
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+       struct i915_syncmap *p = *root;
+
+       /*
+        * We expect to be called in sequence following is_later(id), which
+        * should have preloaded the root for us.
+        */
+       if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
+               __sync_set_seqno(p, id, seqno);
+               return 0;
+       }
+
+       return __sync_set(root, id, seqno);
+}
+
+static void __sync_free(struct i915_syncmap *p)
+{
+       if (p->height) {
+               unsigned int i;
+
+               while ((i = ffs(p->bitmap))) {
+                       p->bitmap &= ~0u << i;
+                       __sync_free(__sync_child(p)[i - 1]);
+               }
+       }
+
+       kfree(p);
+}
+
+/**
+ * i915_syncmap_free -- free all memory associated with the syncmap
+ * @root - pointer to the #i915_syncmap
+ *
+ * Either when the timeline is to be freed and we no longer need the sync
+ * point tracking, or when the fences are all known to be signaled and the
+ * sync point tracking is redundant, we can free the #i915_syncmap to recover
+ * its allocations.
+ *
+ * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
+ * reuse.
+ */
+void i915_syncmap_free(struct i915_syncmap **root)
+{
+       struct i915_syncmap *p;
+
+       p = *root;
+       if (!p)
+               return;
+
+       while (p->parent)
+               p = p->parent;
+
+       __sync_free(p);
+       *root = NULL;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_syncmap.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h

new file mode 100644 (file)

index 0000000..0653f70
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_SYNCMAP_H__
+#define __I915_SYNCMAP_H__
+
+#include <linux/types.h>
+
+struct i915_syncmap;
+#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */
+
+void i915_syncmap_init(struct i915_syncmap **root);
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno);
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno);
+void i915_syncmap_free(struct i915_syncmap **root);
+
+#endif /* __I915_SYNCMAP_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c

index f3fdfda5e5588d8a040eebb570f32bd044cb9c77..1eef3fae4db31312f788372d72470ad2e9b38f5c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
         struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
         struct drm_device *dev = &dev_priv->drm;
         struct i915_gem_context *ctx;
-       u32 *temp = NULL; /* Just here to make handling failures easy */
         int slice = (int)(uintptr_t)attr->private;
+       u32 **remap_info;
         int ret;
  
-       if (!HAS_HW_CONTEXTS(dev_priv))
-               return -ENXIO;
-
         ret = l3_access_valid(dev_priv, offset);
         if (ret)
                 return ret;
@@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
         if (ret)
                 return ret;
  
-       if (!dev_priv->l3_parity.remap_info[slice]) {
-               temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-               if (!temp) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return -ENOMEM;
+       remap_info = &dev_priv->l3_parity.remap_info[slice];
+       if (!*remap_info) {
+               *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+               if (!*remap_info) {
+                       ret = -ENOMEM;
+                       goto out;
                 }
         }
  
@@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
          * aren't propagated. Since I cannot find a stable way to reset the GPU
          * at this point it is left as a TODO.
         */
-       if (temp)
-               dev_priv->l3_parity.remap_info[slice] = temp;
-
-       memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
+       memcpy(*remap_info + (offset/4), buf, count);
  
         /* NB: We defer the remapping until we switch to the context */
         list_for_each_entry(ctx, &dev_priv->context_list, link)
                 ctx->remap_slice |= (1<<slice);
  
+       ret = count;
+
+out:
         mutex_unlock(&dev->struct_mutex);
  
-       return count;
+       return ret;
  }
  
  static struct bin_attribute dpf_attrs = {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h

index 66404c5aee82e501a263c52098500abd28304d9a..b24a83d435592bde89236f12c218f747d1632e04 100644 (file)
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -89,6 +89,55 @@ TRACE_EVENT(intel_memory_cxsr,
                       __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
  );
  
+TRACE_EVENT(g4x_wm,
+           TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm),
+           TP_ARGS(crtc, wm),
+
+           TP_STRUCT__entry(
+                            __field(enum pipe, pipe)
+                            __field(u32, frame)
+                            __field(u32, scanline)
+                            __field(u16, primary)
+                            __field(u16, sprite)
+                            __field(u16, cursor)
+                            __field(u16, sr_plane)
+                            __field(u16, sr_cursor)
+                            __field(u16, sr_fbc)
+                            __field(u16, hpll_plane)
+                            __field(u16, hpll_cursor)
+                            __field(u16, hpll_fbc)
+                            __field(bool, cxsr)
+                            __field(bool, hpll)
+                            __field(bool, fbc)
+                            ),
+
+           TP_fast_assign(
+                          __entry->pipe = crtc->pipe;
+                          __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev,
+                                                                                      crtc->pipe);
+                          __entry->scanline = intel_get_crtc_scanline(crtc);
+                          __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY];
+                          __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0];
+                          __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR];
+                          __entry->sr_plane = wm->sr.plane;
+                          __entry->sr_cursor = wm->sr.cursor;
+                          __entry->sr_fbc = wm->sr.fbc;
+                          __entry->hpll_plane = wm->hpll.plane;
+                          __entry->hpll_cursor = wm->hpll.cursor;
+                          __entry->hpll_fbc = wm->hpll.fbc;
+                          __entry->cxsr = wm->cxsr;
+                          __entry->hpll = wm->hpll_en;
+                          __entry->fbc = wm->fbc_en;
+                          ),
+
+           TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
+                     pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
+                     __entry->primary, __entry->sprite, __entry->cursor,
+                     yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc,
+                     yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc,
+                     yesno(__entry->fbc))
+);
+
  TRACE_EVENT(vlv_wm,
             TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm),
             TP_ARGS(crtc, wm),
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h

index c5455d36b6172bab6732092c232607c87672c59b..16ecd1ab108d48f6d7b036801a32f40c2e9ccf5d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
  #define overflows_type(x, T) \
         (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
  
-#define ptr_mask_bits(ptr) ({                                          \
+#define ptr_mask_bits(ptr, n) ({                                       \
         unsigned long __v = (unsigned long)(ptr);                       \
-       (typeof(ptr))(__v & PAGE_MASK);                                 \
+       (typeof(ptr))(__v & -BIT(n));                                   \
  })
  
-#define ptr_unpack_bits(ptr, bits) ({                                  \
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({                               \
         unsigned long __v = (unsigned long)(ptr);                       \
-       (bits) = __v & ~PAGE_MASK;                                      \
-       (typeof(ptr))(__v & PAGE_MASK);                                 \
+       *(bits) = __v & (BIT(n) - 1);                                   \
+       (typeof(ptr))(__v & -BIT(n));                                   \
  })
  
-#define ptr_pack_bits(ptr, bits)                                       \
+#define ptr_pack_bits(ptr, bits, n)                                    \
         ((typeof(ptr))((unsigned long)(ptr) | (bits)))
  
+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
  #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
  
  #define fetch_and_zero(ptr) ({                                         \
@@ -92,4 +99,19 @@
         __T;                                                            \
  })
  
+#define __mask_next_bit(mask) ({                                       \
+       int __idx = ffs(mask) - 1;                                      \
+       mask &= ~BIT(__idx);                                            \
+       __idx;                                                          \
+})
+
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+                                  struct list_head *first)
+{
+       first->prev = head;
+       WRITE_ONCE(head->next, first);
+}
+
  #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c

index a40c82c654506a58f4205bd6c0ef5490faf2baeb..4325cb0a04f5db5e3cdf37e70330283309d93d06 100644 (file)
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -102,23 +102,7 @@ void
  intel_plane_destroy_state(struct drm_plane *plane,
                           struct drm_plane_state *state)
  {
-       struct i915_vma *vma;
-
-       vma = fetch_and_zero(&to_intel_plane_state(state)->vma);
-
-       /*
-        * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma.
-        * We currently don't clear all planes during driver unload, so we have
-        * to be able to unpin vma here for now.
-        *
-        * Normally this can only happen during unload when kmscon is disabled
-        * and userspace doesn't attempt to set a framebuffer at all.
-        */
-       if (vma) {
-               mutex_lock(&plane->dev->struct_mutex);
-               intel_unpin_fb_vma(vma);
-               mutex_unlock(&plane->dev->struct_mutex);
-       }
+       WARN_ON(to_intel_plane_state(state)->vma);
  
         drm_atomic_helper_plane_destroy_state(plane, state);
  }
@@ -185,7 +169,7 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state,
         }
  
         intel_state->base.visible = false;
-       ret = intel_plane->check_plane(plane, crtc_state, intel_state);
+       ret = intel_plane->check_plane(intel_plane, crtc_state, intel_state);
         if (ret)
                 return ret;
  
@@ -235,14 +219,14 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
                 trace_intel_update_plane(plane,
                                          to_intel_crtc(crtc));
  
-               intel_plane->update_plane(plane,
+               intel_plane->update_plane(intel_plane,
                                           to_intel_crtc_state(crtc->state),
                                           intel_state);
         } else {
                 trace_intel_disable_plane(plane,
                                           to_intel_crtc(crtc));
  
-               intel_plane->disable_plane(plane, crtc);
+               intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
         }
  }
  
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c

index 52c207e81f413a4465bec60098fedf09efea59cb..d805b6e6fe71467ed4f9a85a74fdba1b5ed81998 100644 (file)
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
                                                  (int) port, (int) pipe);
         }
  
-       switch (intel_encoder->type) {
-       case INTEL_OUTPUT_HDMI:
-               intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-                                      crtc_state->port_clock,
-                                      false, 0);
-               break;
-       case INTEL_OUTPUT_DP:
-               intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-                                      adjusted_mode->crtc_clock,
-                                      true, crtc_state->port_clock);
-               break;
-       default:
-               break;
-       }
+       intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld,
+                              crtc_state->port_clock,
+                              intel_encoder->type == INTEL_OUTPUT_DP);
  }
  
  /**
@@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
                                                  (int) port, (int) pipe);
         }
  
-       intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0);
+       intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false);
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c

index 9ccbf26124c6169d2e35a6e915c3da09845ba269..183afcb036aa915ecde8b33ce7b83195f9c9c519 100644 (file)
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -64,10 +64,12 @@ static unsigned long wait_timeout(void)
  
  static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
  {
-       DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n",
+       DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n",
                          engine->name, __builtin_return_address(0),
                          yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-                                       &engine->irq_posted)));
+                                       &engine->irq_posted)),
+                        intel_engine_get_seqno(engine),
+                        intel_engine_last_submit(engine));
  
         set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
  }
@@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg)
         return 0;
  }
  
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+                                  bool wakeup)
  {
         struct intel_engine_cs *engine = request->engine;
         struct intel_breadcrumbs *b = &engine->breadcrumbs;
         struct rb_node *parent, **p;
-       bool first, wakeup;
+       bool first;
         u32 seqno;
  
         /* Note that we may be called from an interrupt handler on another
@@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
          * If we are the oldest waiter, enable the irq (after which we
          * must double check that the seqno did not complete).
          */
-       wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
+       wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
  
         /* Now insert ourselves into the retirement ordered list of signals
          * on this engine. We track the oldest seqno as that will be the
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c

index dd3ad52b7dfeac5e7d6f8eb59ee18b372f20a77f..29792972d55db9849430aaf7b4d4127c9afd29ce 100644 (file)
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk)
  
  static int glk_calc_cdclk(int max_pixclk)
  {
-       if (max_pixclk > 2 * 158400)
+       /*
+        * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
+        * as a temporary workaround. Use a higher cdclk instead. (Note that
+        * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
+        * cdclk.)
+        */
+       if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100))
                 return 316800;
-       else if (max_pixclk > 2 * 79200)
+       else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100))
                 return 158400;
         else
                 return 79200;
@@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
         int max_cdclk_freq = dev_priv->max_cdclk_freq;
  
         if (IS_GEMINILAKE(dev_priv))
-               return 2 * max_cdclk_freq;
+               /*
+                * FIXME: Limiting to 99% as a temporary workaround. See
+                * glk_calc_cdclk() for details.
+                */
+               return 2 * max_cdclk_freq * 99 / 100;
         else if (INTEL_INFO(dev_priv)->gen >= 9 ||
                  IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
                 return max_cdclk_freq;
@@ -1798,13 +1808,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv)
         case CLKCFG_FSB_800:
                 return 200000;
         case CLKCFG_FSB_1067:
+       case CLKCFG_FSB_1067_ALT:
                 return 266667;
         case CLKCFG_FSB_1333:
+       case CLKCFG_FSB_1333_ALT:
                 return 333333;
-       /* these two are just a guess; one of them might be right */
-       case CLKCFG_FSB_1600:
-       case CLKCFG_FSB_1600_ALT:
-               return 400000;
         default:
                 return 133333;
         }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c

index 2797bf37c3ac0c146819d9fedeaf3611dcc4162f..84a1f5e85153fe1af4f7cfde8a5e7bf1a00201d5 100644 (file)
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,13 +777,6 @@ out:
         return ret;
  }
  
-static int intel_crt_set_property(struct drm_connector *connector,
-                                 struct drm_property *property,
-                                 uint64_t value)
-{
-       return 0;
-}
-
  void intel_crt_reset(struct drm_encoder *encoder)
  {
         struct drm_i915_private *dev_priv = to_i915(encoder->dev);
@@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
         .late_register = intel_connector_register,
         .early_unregister = intel_connector_unregister,
         .destroy = intel_crt_destroy,
-       .set_property = intel_crt_set_property,
+       .set_property = drm_atomic_helper_connector_set_property,
         .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
         .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-       .atomic_get_property = intel_connector_atomic_get_property,
  };
  
  static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c

index 7d01dfe7faacecf229f526fd748bd82b19c41c1c..3718341662c23207f70ede384602c3b72dc232e8 100644 (file)
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -337,7 +337,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
                 for_each_pipe(dev_priv, pipe)
                         info->num_sprites[pipe] = 2;
-       } else if (INTEL_GEN(dev_priv) >= 5) {
+       } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
                 for_each_pipe(dev_priv, pipe)
                         info->num_sprites[pipe] = 1;
         }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 6a037b856d9672310a12f8652d4ed608fd93d59b..7fa21df5bcd78334ff507a305b5325d81469d91d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1277,7 +1277,7 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
                 I915_STATE_WARN(val & SPRITE_ENABLE,
                      "sprite %c assertion failure, should be off on pipe %c but is still active\n",
                      plane_name(pipe), pipe_name(pipe));
-       } else if (INTEL_GEN(dev_priv) >= 5) {
+       } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
                 u32 val = I915_READ(DVSCNTR(pipe));
                 I915_STATE_WARN(val & DVS_ENABLE,
                      "sprite %c assertion failure, should be off on pipe %c but is still active\n",
@@ -2084,6 +2084,18 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
         }
  }
  
+static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_priv)
+{
+       if (IS_I830(dev_priv))
+               return 16 * 1024;
+       else if (IS_I85X(dev_priv))
+               return 256;
+       else if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
+               return 32;
+       else
+               return 4 * 1024;
+}
+
  static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
  {
         if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -2386,11 +2398,17 @@ u32 intel_compute_tile_offset(int *x, int *y,
                               const struct intel_plane_state *state,
                               int plane)
  {
-       const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+       struct intel_plane *intel_plane = to_intel_plane(state->base.plane);
+       struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
         const struct drm_framebuffer *fb = state->base.fb;
         unsigned int rotation = state->base.rotation;
         int pitch = intel_fb_pitch(fb, plane, rotation);
-       u32 alignment = intel_surf_alignment(fb, plane);
+       u32 alignment;
+
+       if (intel_plane->id == PLANE_CURSOR)
+               alignment = intel_cursor_alignment(dev_priv);
+       else
+               alignment = intel_surf_alignment(fb, plane);
  
         return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
                                           rotation, alignment);
@@ -2750,7 +2768,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
                                 false);
         intel_pre_disable_primary_noatomic(&intel_crtc->base);
         trace_intel_disable_plane(primary, intel_crtc);
-       intel_plane->disable_plane(primary, &intel_crtc->base);
+       intel_plane->disable_plane(intel_plane, intel_crtc);
  
         return;
  
@@ -2981,10 +2999,8 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
                 dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
  
-       if (INTEL_GEN(dev_priv) < 4) {
-               if (crtc->pipe == PIPE_B)
-                       dspcntr |= DISPPLANE_SEL_PIPE_B;
-       }
+       if (INTEL_GEN(dev_priv) < 4)
+               dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe);
  
         switch (fb->format->format) {
         case DRM_FORMAT_C8:
@@ -3063,14 +3079,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
         return 0;
  }
  
-static void i9xx_update_primary_plane(struct drm_plane *primary,
+static void i9xx_update_primary_plane(struct intel_plane *primary,
                                       const struct intel_crtc_state *crtc_state,
                                       const struct intel_plane_state *plane_state)
  {
-       struct drm_i915_private *dev_priv = to_i915(primary->dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       int plane = intel_crtc->plane;
+       struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum plane plane = primary->plane;
         u32 linear_offset;
         u32 dspcntr = plane_state->ctl;
         i915_reg_t reg = DSPCNTR(plane);
@@ -3081,12 +3097,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
         linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
  
         if (INTEL_GEN(dev_priv) >= 4)
-               intel_crtc->dspaddr_offset = plane_state->main.offset;
+               crtc->dspaddr_offset = plane_state->main.offset;
         else
-               intel_crtc->dspaddr_offset = linear_offset;
+               crtc->dspaddr_offset = linear_offset;
  
-       intel_crtc->adjusted_x = x;
-       intel_crtc->adjusted_y = y;
+       crtc->adjusted_x = x;
+       crtc->adjusted_y = y;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
@@ -3112,31 +3128,29 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
                 I915_WRITE_FW(DSPSURF(plane),
                               intel_plane_ggtt_offset(plane_state) +
-                             intel_crtc->dspaddr_offset);
+                             crtc->dspaddr_offset);
                 I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
         } else if (INTEL_GEN(dev_priv) >= 4) {
                 I915_WRITE_FW(DSPSURF(plane),
                               intel_plane_ggtt_offset(plane_state) +
-                             intel_crtc->dspaddr_offset);
+                             crtc->dspaddr_offset);
                 I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
                 I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
         } else {
                 I915_WRITE_FW(DSPADDR(plane),
                               intel_plane_ggtt_offset(plane_state) +
-                             intel_crtc->dspaddr_offset);
+                             crtc->dspaddr_offset);
         }
         POSTING_READ_FW(reg);
  
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  }
  
-static void i9xx_disable_primary_plane(struct drm_plane *primary,
-                                      struct drm_crtc *crtc)
+static void i9xx_disable_primary_plane(struct intel_plane *primary,
+                                      struct intel_crtc *crtc)
  {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int plane = intel_crtc->plane;
+       struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+       enum plane plane = primary->plane;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3321,16 +3335,15 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
         return plane_ctl;
  }
  
-static void skylake_update_primary_plane(struct drm_plane *plane,
+static void skylake_update_primary_plane(struct intel_plane *plane,
                                          const struct intel_crtc_state *crtc_state,
                                          const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       enum plane_id plane_id = to_intel_plane(plane)->id;
-       enum pipe pipe = to_intel_plane(plane)->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum plane_id plane_id = plane->id;
+       enum pipe pipe = plane->pipe;
         u32 plane_ctl = plane_state->ctl;
         unsigned int rotation = plane_state->base.rotation;
         u32 stride = skl_plane_stride(fb, 0, rotation);
@@ -3352,10 +3365,10 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
         dst_w--;
         dst_h--;
  
-       intel_crtc->dspaddr_offset = surf_addr;
+       crtc->dspaddr_offset = surf_addr;
  
-       intel_crtc->adjusted_x = src_x;
-       intel_crtc->adjusted_y = src_y;
+       crtc->adjusted_x = src_x;
+       crtc->adjusted_y = src_y;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
@@ -3394,13 +3407,12 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  }
  
-static void skylake_disable_primary_plane(struct drm_plane *primary,
-                                         struct drm_crtc *crtc)
+static void skylake_disable_primary_plane(struct intel_plane *primary,
+                                         struct intel_crtc *crtc)
  {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       enum plane_id plane_id = to_intel_plane(primary)->id;
-       enum pipe pipe = to_intel_plane(primary)->pipe;
+       struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+       enum plane_id plane_id = primary->id;
+       enum pipe pipe = primary->pipe;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3433,7 +3445,7 @@ static void intel_update_primary_planes(struct drm_device *dev)
                         trace_intel_update_plane(&plane->base,
                                                  to_intel_crtc(crtc));
  
-                       plane->update_plane(&plane->base,
+                       plane->update_plane(plane,
                                             to_intel_crtc_state(crtc->state),
                                             plane_state);
                 }
@@ -4861,12 +4873,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
  {
         if (intel_crtc->overlay) {
                 struct drm_device *dev = intel_crtc->base.dev;
-               struct drm_i915_private *dev_priv = to_i915(dev);
  
                 mutex_lock(&dev->struct_mutex);
-               dev_priv->mm.interruptible = false;
                 (void) intel_overlay_switch_off(intel_crtc->overlay);
-               dev_priv->mm.interruptible = true;
                 mutex_unlock(&dev->struct_mutex);
         }
  
@@ -5086,7 +5095,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
         intel_crtc_dpms_overlay_disable(intel_crtc);
  
         drm_for_each_plane_mask(p, dev, plane_mask)
-               to_intel_plane(p)->disable_plane(p, crtc);
+               to_intel_plane(p)->disable_plane(to_intel_plane(p), intel_crtc);
  
         /*
          * FIXME: Once we grow proper nuclear flip support out of this we need
@@ -5722,6 +5731,8 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
  static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
                              struct drm_atomic_state *old_state)
  {
+       struct intel_atomic_state *old_intel_state =
+               to_intel_atomic_state(old_state);
         struct drm_crtc *crtc = pipe_config->base.crtc;
         struct drm_device *dev = crtc->dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5754,7 +5765,11 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
  
         intel_color_load_luts(&pipe_config->base);
  
-       intel_update_watermarks(intel_crtc);
+       if (dev_priv->display.initial_watermarks != NULL)
+               dev_priv->display.initial_watermarks(old_intel_state,
+                                                    intel_crtc->config);
+       else
+               intel_update_watermarks(intel_crtc);
         intel_enable_pipe(intel_crtc);
  
         assert_vblank_disabled(crtc);
@@ -5920,9 +5935,10 @@ void intel_encoder_destroy(struct drm_encoder *encoder)
  
  /* Cross check the actual hw state with our own modeset state tracking (and it's
   * internal consistency). */
-static void intel_connector_verify_state(struct intel_connector *connector)
+static void intel_connector_verify_state(struct drm_crtc_state *crtc_state,
+                                        struct drm_connector_state *conn_state)
  {
-       struct drm_crtc *crtc = connector->base.state->crtc;
+       struct intel_connector *connector = to_intel_connector(conn_state->connector);
  
         DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
                       connector->base.base.id,
@@ -5930,15 +5946,14 @@ static void intel_connector_verify_state(struct intel_connector *connector)
  
         if (connector->get_hw_state(connector)) {
                 struct intel_encoder *encoder = connector->encoder;
-               struct drm_connector_state *conn_state = connector->base.state;
  
-               I915_STATE_WARN(!crtc,
+               I915_STATE_WARN(!crtc_state,
                          "connector enabled without attached crtc\n");
  
-               if (!crtc)
+               if (!crtc_state)
                         return;
  
-               I915_STATE_WARN(!crtc->state->active,
+               I915_STATE_WARN(!crtc_state->active,
                       "connector is active, but attached crtc isn't\n");
  
                 if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST)
@@ -5950,9 +5965,9 @@ static void intel_connector_verify_state(struct intel_connector *connector)
                 I915_STATE_WARN(conn_state->crtc != encoder->base.crtc,
                         "attached encoder crtc differs from connector crtc\n");
         } else {
-               I915_STATE_WARN(crtc && crtc->state->active,
+               I915_STATE_WARN(crtc_state && crtc_state->active,
                         "attached crtc is active, but connector isn't\n");
-               I915_STATE_WARN(!crtc && connector->base.state->best_encoder,
+               I915_STATE_WARN(!crtc_state && conn_state->best_encoder,
                         "best encoder set without crtc!\n");
         }
  }
@@ -6372,8 +6387,8 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe
         vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val);
  
         reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13);
-       reg_val &= 0x8cffffff;
-       reg_val = 0x8c000000;
+       reg_val &= 0x00ffffff;
+       reg_val |= 0x8c000000;
         vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val);
  
         reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1));
@@ -8177,9 +8192,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
  {
         struct drm_device *dev = crtc->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
-       struct dpll reduced_clock;
-       bool has_reduced_clock = false;
-       struct intel_shared_dpll *pll;
         const struct intel_limit *limit;
         int refclk = 120000;
  
@@ -8221,20 +8233,14 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
                 return -EINVAL;
         }
  
-       ironlake_compute_dpll(crtc, crtc_state,
-                             has_reduced_clock ? &reduced_clock : NULL);
+       ironlake_compute_dpll(crtc, crtc_state, NULL);
  
-       pll = intel_get_shared_dpll(crtc, crtc_state, NULL);
-       if (pll == NULL) {
+       if (!intel_get_shared_dpll(crtc, crtc_state, NULL)) {
                 DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
                                  pipe_name(crtc->pipe));
                 return -EINVAL;
         }
  
-       if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) &&
-           has_reduced_clock)
-               crtc->lowfreq_avail = true;
-
         return 0;
  }
  
@@ -9138,38 +9144,171 @@ out:
         return active;
  }
  
+static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
+{
+       struct drm_i915_private *dev_priv =
+               to_i915(plane_state->base.plane->dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+       u32 base;
+
+       if (INTEL_INFO(dev_priv)->cursor_needs_physical)
+               base = obj->phys_handle->busaddr;
+       else
+               base = intel_plane_ggtt_offset(plane_state);
+
+       base += plane_state->main.offset;
+
+       /* ILK+ do this automagically */
+       if (HAS_GMCH_DISPLAY(dev_priv) &&
+           plane_state->base.rotation & DRM_MODE_ROTATE_180)
+               base += (plane_state->base.crtc_h *
+                        plane_state->base.crtc_w - 1) * fb->format->cpp[0];
+
+       return base;
+}
+
+static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
+{
+       int x = plane_state->base.crtc_x;
+       int y = plane_state->base.crtc_y;
+       u32 pos = 0;
+
+       if (x < 0) {
+               pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
+               x = -x;
+       }
+       pos |= x << CURSOR_X_SHIFT;
+
+       if (y < 0) {
+               pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
+               y = -y;
+       }
+       pos |= y << CURSOR_Y_SHIFT;
+
+       return pos;
+}
+
+static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
+{
+       const struct drm_mode_config *config =
+               &plane_state->base.plane->dev->mode_config;
+       int width = plane_state->base.crtc_w;
+       int height = plane_state->base.crtc_h;
+
+       return width > 0 && width <= config->cursor_width &&
+               height > 0 && height <= config->cursor_height;
+}
+
+static int intel_check_cursor(struct intel_crtc_state *crtc_state,
+                             struct intel_plane_state *plane_state)
+{
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       int src_x, src_y;
+       u32 offset;
+       int ret;
+
+       ret = drm_plane_helper_check_state(&plane_state->base,
+                                          &plane_state->clip,
+                                          DRM_PLANE_HELPER_NO_SCALING,
+                                          DRM_PLANE_HELPER_NO_SCALING,
+                                          true, true);
+       if (ret)
+               return ret;
+
+       if (!fb)
+               return 0;
+
+       if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
+               DRM_DEBUG_KMS("cursor cannot be tiled\n");
+               return -EINVAL;
+       }
+
+       src_x = plane_state->base.src_x >> 16;
+       src_y = plane_state->base.src_y >> 16;
+
+       intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
+       offset = intel_compute_tile_offset(&src_x, &src_y, plane_state, 0);
+
+       if (src_x != 0 || src_y != 0) {
+               DRM_DEBUG_KMS("Arbitrary cursor panning not supported\n");
+               return -EINVAL;
+       }
+
+       plane_state->main.offset = offset;
+
+       return 0;
+}
+
  static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
                            const struct intel_plane_state *plane_state)
  {
-       unsigned int width = plane_state->base.crtc_w;
-       unsigned int stride = roundup_pow_of_two(width) * 4;
+       const struct drm_framebuffer *fb = plane_state->base.fb;
  
-       switch (stride) {
-       default:
-               WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
-                         width, stride);
-               stride = 256;
-               /* fallthrough */
+       return CURSOR_ENABLE |
+               CURSOR_GAMMA_ENABLE |
+               CURSOR_FORMAT_ARGB |
+               CURSOR_STRIDE(fb->pitches[0]);
+}
+
+static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
+{
+       int width = plane_state->base.crtc_w;
+
+       /*
+        * 845g/865g are only limited by the width of their cursors,
+        * the height is arbitrary up to the precision of the register.
+        */
+       return intel_cursor_size_ok(plane_state) && IS_ALIGNED(width, 64);
+}
+
+static int i845_check_cursor(struct intel_plane *plane,
+                            struct intel_crtc_state *crtc_state,
+                            struct intel_plane_state *plane_state)
+{
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       int ret;
+
+       ret = intel_check_cursor(crtc_state, plane_state);
+       if (ret)
+               return ret;
+
+       /* if we want to turn off the cursor ignore width and height */
+       if (!fb)
+               return 0;
+
+       /* Check for which cursor types we support */
+       if (!i845_cursor_size_ok(plane_state)) {
+               DRM_DEBUG("Cursor dimension %dx%d not supported\n",
+                         plane_state->base.crtc_w,
+                         plane_state->base.crtc_h);
+               return -EINVAL;
+       }
+
+       switch (fb->pitches[0]) {
         case 256:
         case 512:
         case 1024:
         case 2048:
                 break;
+       default:
+               DRM_DEBUG_KMS("Invalid cursor stride (%u)\n",
+                             fb->pitches[0]);
+               return -EINVAL;
         }
  
-       return CURSOR_ENABLE |
-               CURSOR_GAMMA_ENABLE |
-               CURSOR_FORMAT_ARGB |
-               CURSOR_STRIDE(stride);
+       plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state);
+
+       return 0;
  }
  
-static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
+static void i845_update_cursor(struct intel_plane *plane,
+                              const struct intel_crtc_state *crtc_state,
                                const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       uint32_t cntl = 0, size = 0;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       u32 cntl = 0, base = 0, pos = 0, size = 0;
+       unsigned long irqflags;
  
         if (plane_state && plane_state->base.visible) {
                 unsigned int width = plane_state->base.crtc_w;
@@ -9177,35 +9316,41 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
  
                 cntl = plane_state->ctl;
                 size = (height << 12) | width;
-       }
  
-       if (intel_crtc->cursor_cntl != 0 &&
-           (intel_crtc->cursor_base != base ||
-            intel_crtc->cursor_size != size ||
-            intel_crtc->cursor_cntl != cntl)) {
-               /* On these chipsets we can only modify the base/size/stride
-                * whilst the cursor is disabled.
-                */
-               I915_WRITE_FW(CURCNTR(PIPE_A), 0);
-               POSTING_READ_FW(CURCNTR(PIPE_A));
-               intel_crtc->cursor_cntl = 0;
+               base = intel_cursor_base(plane_state);
+               pos = intel_cursor_position(plane_state);
         }
  
-       if (intel_crtc->cursor_base != base) {
-               I915_WRITE_FW(CURBASE(PIPE_A), base);
-               intel_crtc->cursor_base = base;
-       }
+       spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
-       if (intel_crtc->cursor_size != size) {
+       /* On these chipsets we can only modify the base/size/stride
+        * whilst the cursor is disabled.
+        */
+       if (plane->cursor.base != base ||
+           plane->cursor.size != size ||
+           plane->cursor.cntl != cntl) {
+               I915_WRITE_FW(CURCNTR(PIPE_A), 0);
+               I915_WRITE_FW(CURBASE(PIPE_A), base);
                 I915_WRITE_FW(CURSIZE, size);
-               intel_crtc->cursor_size = size;
-       }
-
-       if (intel_crtc->cursor_cntl != cntl) {
+               I915_WRITE_FW(CURPOS(PIPE_A), pos);
                 I915_WRITE_FW(CURCNTR(PIPE_A), cntl);
-               POSTING_READ_FW(CURCNTR(PIPE_A));
-               intel_crtc->cursor_cntl = cntl;
+
+               plane->cursor.base = base;
+               plane->cursor.size = size;
+               plane->cursor.cntl = cntl;
+       } else {
+               I915_WRITE_FW(CURPOS(PIPE_A), pos);
         }
+
+       POSTING_READ_FW(CURCNTR(PIPE_A));
+
+       spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+static void i845_disable_cursor(struct intel_plane *plane,
+                               struct intel_crtc *crtc)
+{
+       i845_update_cursor(plane, NULL, NULL);
  }
  
  static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
@@ -9214,7 +9359,6 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
         struct drm_i915_private *dev_priv =
                 to_i915(plane_state->base.plane->dev);
         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-       enum pipe pipe = crtc->pipe;
         u32 cntl;
  
         cntl = MCURSOR_GAMMA_ENABLE;
@@ -9222,7 +9366,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
         if (HAS_DDI(dev_priv))
                 cntl |= CURSOR_PIPE_CSC_ENABLE;
  
-       cntl |= pipe << 28; /* Connect to correct pipe */
+       cntl |= MCURSOR_PIPE_SELECT(crtc->pipe);
  
         switch (plane_state->base.crtc_w) {
         case 64:
@@ -9245,116 +9389,154 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
         return cntl;
  }
  
-static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
-                              const struct intel_plane_state *plane_state)
+static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int pipe = intel_crtc->pipe;
-       uint32_t cntl = 0;
+       struct drm_i915_private *dev_priv =
+               to_i915(plane_state->base.plane->dev);
+       int width = plane_state->base.crtc_w;
+       int height = plane_state->base.crtc_h;
  
-       if (plane_state && plane_state->base.visible)
-               cntl = plane_state->ctl;
+       if (!intel_cursor_size_ok(plane_state))
+               return false;
  
-       if (intel_crtc->cursor_cntl != cntl) {
-               I915_WRITE_FW(CURCNTR(pipe), cntl);
-               POSTING_READ_FW(CURCNTR(pipe));
-               intel_crtc->cursor_cntl = cntl;
+       /* Cursor width is limited to a few power-of-two sizes */
+       switch (width) {
+       case 256:
+       case 128:
+       case 64:
+               break;
+       default:
+               return false;
         }
  
-       /* and commit changes on next vblank */
-       I915_WRITE_FW(CURBASE(pipe), base);
-       POSTING_READ_FW(CURBASE(pipe));
+       /*
+        * IVB+ have CUR_FBC_CTL which allows an arbitrary cursor
+        * height from 8 lines up to the cursor width, when the
+        * cursor is not rotated. Everything else requires square
+        * cursors.
+        */
+       if (HAS_CUR_FBC(dev_priv) &&
+           plane_state->base.rotation & DRM_MODE_ROTATE_0) {
+               if (height < 8 || height > width)
+                       return false;
+       } else {
+               if (height != width)
+                       return false;
+       }
  
-       intel_crtc->cursor_base = base;
+       return true;
  }
  
-/* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */
-static void intel_crtc_update_cursor(struct drm_crtc *crtc,
-                                    const struct intel_plane_state *plane_state)
+static int i9xx_check_cursor(struct intel_plane *plane,
+                            struct intel_crtc_state *crtc_state,
+                            struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int pipe = intel_crtc->pipe;
-       u32 base = intel_crtc->cursor_addr;
-       unsigned long irqflags;
-       u32 pos = 0;
-
-       if (plane_state) {
-               int x = plane_state->base.crtc_x;
-               int y = plane_state->base.crtc_y;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum pipe pipe = plane->pipe;
+       int ret;
  
-               if (x < 0) {
-                       pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
-                       x = -x;
-               }
-               pos |= x << CURSOR_X_SHIFT;
+       ret = intel_check_cursor(crtc_state, plane_state);
+       if (ret)
+               return ret;
  
-               if (y < 0) {
-                       pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
-                       y = -y;
-               }
-               pos |= y << CURSOR_Y_SHIFT;
+       /* if we want to turn off the cursor ignore width and height */
+       if (!fb)
+               return 0;
  
-               /* ILK+ do this automagically */
-               if (HAS_GMCH_DISPLAY(dev_priv) &&
-                   plane_state->base.rotation & DRM_MODE_ROTATE_180) {
-                       base += (plane_state->base.crtc_h *
-                                plane_state->base.crtc_w - 1) * 4;
-               }
+       /* Check for which cursor types we support */
+       if (!i9xx_cursor_size_ok(plane_state)) {
+               DRM_DEBUG("Cursor dimension %dx%d not supported\n",
+                         plane_state->base.crtc_w,
+                         plane_state->base.crtc_h);
+               return -EINVAL;
         }
  
-       spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+       if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) {
+               DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n",
+                             fb->pitches[0], plane_state->base.crtc_w);
+               return -EINVAL;
+       }
  
-       I915_WRITE_FW(CURPOS(pipe), pos);
+       /*
+        * There's something wrong with the cursor on CHV pipe C.
+        * If it straddles the left edge of the screen then
+        * moving it away from the edge or disabling it often
+        * results in a pipe underrun, and often that can lead to
+        * dead pipe (constant underrun reported, and it scans
+        * out just a solid color). To recover from that, the
+        * display power well must be turned off and on again.
+        * Refuse the put the cursor into that compromised position.
+        */
+       if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
+           plane_state->base.visible && plane_state->base.crtc_x < 0) {
+               DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
+               return -EINVAL;
+       }
  
-       if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-               i845_update_cursor(crtc, base, plane_state);
-       else
-               i9xx_update_cursor(crtc, base, plane_state);
+       plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state);
  
-       spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+       return 0;
  }
  
-static bool cursor_size_ok(struct drm_i915_private *dev_priv,
-                          uint32_t width, uint32_t height)
+static void i9xx_update_cursor(struct intel_plane *plane,
+                              const struct intel_crtc_state *crtc_state,
+                              const struct intel_plane_state *plane_state)
  {
-       if (width == 0 || height == 0)
-               return false;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum pipe pipe = plane->pipe;
+       u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0;
+       unsigned long irqflags;
  
-       /*
-        * 845g/865g are special in that they are only limited by
-        * the width of their cursors, the height is arbitrary up to
-        * the precision of the register. Everything else requires
-        * square cursors, limited to a few power-of-two sizes.
-        */
-       if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
-               if ((width & 63) != 0)
-                       return false;
+       if (plane_state && plane_state->base.visible) {
+               cntl = plane_state->ctl;
  
-               if (width > (IS_I845G(dev_priv) ? 64 : 512))
-                       return false;
+               if (plane_state->base.crtc_h != plane_state->base.crtc_w)
+                       fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1);
  
-               if (height > 1023)
-                       return false;
+               base = intel_cursor_base(plane_state);
+               pos = intel_cursor_position(plane_state);
+       }
+
+       spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+       /*
+        * On some platforms writing CURCNTR first will also
+        * cause CURPOS to be armed by the CURBASE write.
+        * Without the CURCNTR write the CURPOS write would
+        * arm itself.
+        *
+        * CURCNTR and CUR_FBC_CTL are always
+        * armed by the CURBASE write only.
+        */
+       if (plane->cursor.base != base ||
+           plane->cursor.size != fbc_ctl ||
+           plane->cursor.cntl != cntl) {
+               I915_WRITE_FW(CURCNTR(pipe), cntl);
+               if (HAS_CUR_FBC(dev_priv))
+                       I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl);
+               I915_WRITE_FW(CURPOS(pipe), pos);
+               I915_WRITE_FW(CURBASE(pipe), base);
+
+               plane->cursor.base = base;
+               plane->cursor.size = fbc_ctl;
+               plane->cursor.cntl = cntl;
         } else {
-               switch (width | height) {
-               case 256:
-               case 128:
-                       if (IS_GEN2(dev_priv))
-                               return false;
-               case 64:
-                       break;
-               default:
-                       return false;
-               }
+               I915_WRITE_FW(CURPOS(pipe), pos);
         }
  
-       return true;
+       POSTING_READ_FW(CURBASE(pipe));
+
+       spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+static void i9xx_disable_cursor(struct intel_plane *plane,
+                               struct intel_crtc *crtc)
+{
+       i9xx_update_cursor(plane, NULL, NULL);
  }
  
+
  /* VESA 640x480x72Hz mode to set on the pipe */
  static struct drm_display_mode load_detect_mode = {
         DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664,
@@ -9566,6 +9748,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
          */
         if (!crtc) {
                 DRM_DEBUG_KMS("no pipe available for load-detect\n");
+               ret = -ENODEV;
                 goto fail;
         }
  
@@ -9622,6 +9805,7 @@ found:
                 DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
         if (IS_ERR(fb)) {
                 DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
+               ret = PTR_ERR(fb);
                 goto fail;
         }
  
@@ -10853,21 +11037,21 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
                          turn_off, turn_on, mode_changed);
  
         if (turn_on) {
-               if (INTEL_GEN(dev_priv) < 5)
+               if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
                         pipe_config->update_wm_pre = true;
  
                 /* must disable cxsr around plane enable/disable */
                 if (plane->id != PLANE_CURSOR)
                         pipe_config->disable_cxsr = true;
         } else if (turn_off) {
-               if (INTEL_GEN(dev_priv) < 5)
+               if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
                         pipe_config->update_wm_post = true;
  
                 /* must disable cxsr around plane enable/disable */
                 if (plane->id != PLANE_CURSOR)
                         pipe_config->disable_cxsr = true;
         } else if (intel_wm_need_update(&plane->base, plane_state)) {
-               if (INTEL_GEN(dev_priv) < 5) {
+               if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
                         /* FIXME bollocks */
                         pipe_config->update_wm_pre = true;
                         pipe_config->update_wm_post = true;
@@ -11291,7 +11475,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
         shared_dpll = crtc_state->shared_dpll;
         dpll_hw_state = crtc_state->dpll_hw_state;
         force_thru = crtc_state->pch_pfit.force_thru;
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+       if (IS_G4X(dev_priv) ||
+           IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
                 wm_state = crtc_state->wm;
  
         /* Keep base drm_crtc_state intact, only clear our extended struct */
@@ -11303,7 +11488,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
         crtc_state->shared_dpll = shared_dpll;
         crtc_state->dpll_hw_state = dpll_hw_state;
         crtc_state->pch_pfit.force_thru = force_thru;
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+       if (IS_G4X(dev_priv) ||
+           IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
                 crtc_state->wm = wm_state;
  }
  
@@ -11865,7 +12051,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
          * allocation. In that case since the ddb allocation will be updated
          * once the plane becomes visible, we can skip this check
          */
-       if (intel_crtc->cursor_addr) {
+       if (1) {
                 hw_plane_wm = &hw_wm.planes[PLANE_CURSOR];
                 sw_plane_wm = &sw_wm->planes[PLANE_CURSOR];
  
@@ -11921,11 +12107,15 @@ verify_connector_state(struct drm_device *dev,
  
         for_each_new_connector_in_state(state, connector, new_conn_state, i) {
                 struct drm_encoder *encoder = connector->encoder;
+               struct drm_crtc_state *crtc_state = NULL;
  
                 if (new_conn_state->crtc != crtc)
                         continue;
  
-               intel_connector_verify_state(to_intel_connector(connector));
+               if (crtc)
+                       crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
+
+               intel_connector_verify_state(crtc_state, new_conn_state);
  
                 I915_STATE_WARN(new_conn_state->best_encoder != encoder,
                      "connector's atomic encoder doesn't match legacy encoder\n");
@@ -12043,7 +12233,7 @@ verify_crtc_state(struct drm_crtc *crtc,
  
         intel_pipe_config_sanity_check(dev_priv, pipe_config);
  
-       sw_config = to_intel_crtc_state(crtc->state);
+       sw_config = to_intel_crtc_state(new_crtc_state);
         if (!intel_pipe_config_compare(dev_priv, sw_config,
                                        pipe_config, false)) {
                 I915_STATE_WARN(1, "pipe state doesn't match!\n");
@@ -13139,7 +13329,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
         if (obj) {
                 if (plane->type == DRM_PLANE_TYPE_CURSOR &&
                     INTEL_INFO(dev_priv)->cursor_needs_physical) {
-                       const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+                       const int align = intel_cursor_alignment(dev_priv);
  
                         ret = i915_gem_object_attach_phys(obj, align);
                         if (ret) {
@@ -13269,11 +13459,11 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state
  }
  
  static int
-intel_check_primary_plane(struct drm_plane *plane,
+intel_check_primary_plane(struct intel_plane *plane,
                           struct intel_crtc_state *crtc_state,
                           struct intel_plane_state *state)
  {
-       struct drm_i915_private *dev_priv = to_i915(plane->dev);
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
         struct drm_crtc *crtc = state->base.crtc;
         int min_scale = DRM_PLANE_HELPER_NO_SCALING;
         int max_scale = DRM_PLANE_HELPER_NO_SCALING;
@@ -13452,7 +13642,7 @@ intel_legacy_cursor_update(struct drm_plane *plane,
                 goto out_free;
  
         if (INTEL_INFO(dev_priv)->cursor_needs_physical) {
-               int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+               int align = intel_cursor_alignment(dev_priv);
  
                 ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align);
                 if (ret) {
@@ -13488,12 +13678,12 @@ intel_legacy_cursor_update(struct drm_plane *plane,
  
         if (plane->state->visible) {
                 trace_intel_update_plane(plane, to_intel_crtc(crtc));
-               intel_plane->update_plane(plane,
+               intel_plane->update_plane(intel_plane,
                                           to_intel_crtc_state(crtc->state),
                                           to_intel_plane_state(plane->state));
         } else {
                 trace_intel_disable_plane(plane, to_intel_crtc(crtc));
-               intel_plane->disable_plane(plane, crtc);
+               intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
         }
  
         intel_cleanup_plane_fb(plane, new_plane_state);
@@ -13636,107 +13826,9 @@ fail:
         return ERR_PTR(ret);
  }
  
-static int
-intel_check_cursor_plane(struct drm_plane *plane,
-                        struct intel_crtc_state *crtc_state,
-                        struct intel_plane_state *state)
-{
-       struct drm_i915_private *dev_priv = to_i915(plane->dev);
-       struct drm_framebuffer *fb = state->base.fb;
-       struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-       enum pipe pipe = to_intel_plane(plane)->pipe;
-       unsigned stride;
-       int ret;
-
-       ret = drm_plane_helper_check_state(&state->base,
-                                          &state->clip,
-                                          DRM_PLANE_HELPER_NO_SCALING,
-                                          DRM_PLANE_HELPER_NO_SCALING,
-                                          true, true);
-       if (ret)
-               return ret;
-
-       /* if we want to turn off the cursor ignore width and height */
-       if (!obj)
-               return 0;
-
-       /* Check for which cursor types we support */
-       if (!cursor_size_ok(dev_priv, state->base.crtc_w,
-                           state->base.crtc_h)) {
-               DRM_DEBUG("Cursor dimension %dx%d not supported\n",
-                         state->base.crtc_w, state->base.crtc_h);
-               return -EINVAL;
-       }
-
-       stride = roundup_pow_of_two(state->base.crtc_w) * 4;
-       if (obj->base.size < stride * state->base.crtc_h) {
-               DRM_DEBUG_KMS("buffer is too small\n");
-               return -ENOMEM;
-       }
-
-       if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
-               DRM_DEBUG_KMS("cursor cannot be tiled\n");
-               return -EINVAL;
-       }
-
-       /*
-        * There's something wrong with the cursor on CHV pipe C.
-        * If it straddles the left edge of the screen then
-        * moving it away from the edge or disabling it often
-        * results in a pipe underrun, and often that can lead to
-        * dead pipe (constant underrun reported, and it scans
-        * out just a solid color). To recover from that, the
-        * display power well must be turned off and on again.
-        * Refuse the put the cursor into that compromised position.
-        */
-       if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
-           state->base.visible && state->base.crtc_x < 0) {
-               DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
-               return -EINVAL;
-       }
-
-       if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-               state->ctl = i845_cursor_ctl(crtc_state, state);
-       else
-               state->ctl = i9xx_cursor_ctl(crtc_state, state);
-
-       return 0;
-}
-
-static void
-intel_disable_cursor_plane(struct drm_plane *plane,
-                          struct drm_crtc *crtc)
-{
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-       intel_crtc->cursor_addr = 0;
-       intel_crtc_update_cursor(crtc, NULL);
-}
-
-static void
-intel_update_cursor_plane(struct drm_plane *plane,
-                         const struct intel_crtc_state *crtc_state,
-                         const struct intel_plane_state *state)
-{
-       struct drm_crtc *crtc = crtc_state->base.crtc;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct drm_i915_private *dev_priv = to_i915(plane->dev);
-       struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb);
-       uint32_t addr;
-
-       if (!obj)
-               addr = 0;
-       else if (!INTEL_INFO(dev_priv)->cursor_needs_physical)
-               addr = intel_plane_ggtt_offset(state);
-       else
-               addr = obj->phys_handle->busaddr;
-
-       intel_crtc->cursor_addr = addr;
-       intel_crtc_update_cursor(crtc, state);
-}
-
  static struct intel_plane *
-intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
+intel_cursor_plane_create(struct drm_i915_private *dev_priv,
+                         enum pipe pipe)
  {
         struct intel_plane *cursor = NULL;
         struct intel_plane_state *state = NULL;
@@ -13762,9 +13854,22 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
         cursor->plane = pipe;
         cursor->id = PLANE_CURSOR;
         cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe);
-       cursor->check_plane = intel_check_cursor_plane;
-       cursor->update_plane = intel_update_cursor_plane;
-       cursor->disable_plane = intel_disable_cursor_plane;
+
+       if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
+               cursor->update_plane = i845_update_cursor;
+               cursor->disable_plane = i845_disable_cursor;
+               cursor->check_plane = i845_check_cursor;
+       } else {
+               cursor->update_plane = i9xx_update_cursor;
+               cursor->disable_plane = i9xx_disable_cursor;
+               cursor->check_plane = i9xx_check_cursor;
+       }
+
+       cursor->cursor.base = ~0;
+       cursor->cursor.cntl = ~0;
+
+       if (IS_I845G(dev_priv) || IS_I865G(dev_priv) || HAS_CUR_FBC(dev_priv))
+               cursor->cursor.size = ~0;
  
         ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base,
                                        0, &intel_cursor_plane_funcs,
@@ -13873,10 +13978,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
         intel_crtc->pipe = pipe;
         intel_crtc->plane = primary->plane;
  
-       intel_crtc->cursor_base = ~0;
-       intel_crtc->cursor_cntl = ~0;
-       intel_crtc->cursor_size = ~0;
-
         /* initialize shared scalers */
         intel_crtc_init_scalers(intel_crtc, crtc_state);
  
@@ -14416,7 +14517,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
         case DRM_FORMAT_UYVY:
         case DRM_FORMAT_YVYU:
         case DRM_FORMAT_VYUY:
-               if (INTEL_GEN(dev_priv) < 5) {
+               if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
                         DRM_DEBUG_KMS("unsupported pixel format: %s\n",
                                       drm_get_format_name(mode_cmd->pixel_format, &format_name));
                         goto err;
@@ -14928,6 +15029,7 @@ int intel_modeset_init(struct drm_device *dev)
  
         dev->mode_config.funcs = &intel_mode_funcs;
  
+       init_llist_head(&dev_priv->atomic_helper.free_list);
         INIT_WORK(&dev_priv->atomic_helper.free_work,
                   intel_atomic_helper_free_state_worker);
  
@@ -15149,7 +15251,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                                 continue;
  
                         trace_intel_disable_plane(&plane->base, crtc);
-                       plane->disable_plane(&plane->base, &crtc->base);
+                       plane->disable_plane(plane, crtc);
                 }
         }
  
@@ -15520,7 +15622,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
                 pll->on = false;
         }
  
-       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+       if (IS_G4X(dev_priv)) {
+               g4x_wm_get_hw_state(dev);
+               g4x_wm_sanitize(dev_priv);
+       } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
                 vlv_wm_get_hw_state(dev);
                 vlv_wm_sanitize(dev_priv);
         } else if (IS_GEN9(dev_priv)) {
@@ -15554,13 +15659,6 @@ void intel_display_resume(struct drm_device *dev)
         if (state)
                 state->acquire_ctx = &ctx;
  
-       /*
-        * This is a cludge because with real atomic modeset mode_config.mutex
-        * won't be taken. Unfortunately some probed state like
-        * audio_codec_enable is still protected by mode_config.mutex, so lock
-        * it here for now.
-        */
-       mutex_lock(&dev->mode_config.mutex);
         drm_modeset_acquire_init(&ctx, 0);
  
         while (1) {
@@ -15576,7 +15674,6 @@ void intel_display_resume(struct drm_device *dev)
  
         drm_modeset_drop_locks(&ctx);
         drm_modeset_acquire_fini(&ctx);
-       mutex_unlock(&dev->mode_config.mutex);
  
         if (ret)
                 DRM_ERROR("Restoring old state failed with %i\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c

index ee77b519835c5fd9d8c582a9c3169b43d06ebab6..4a6feb6a69bd790322f244aeceef306284faf3cb 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -133,36 +133,55 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
                                       enum pipe pipe);
  static void intel_dp_unset_edid(struct intel_dp *intel_dp);
  
-static int
-intel_dp_max_link_bw(struct intel_dp  *intel_dp)
+static int intel_dp_num_rates(u8 link_bw_code)
  {
-       int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
-
-       switch (max_link_bw) {
+       switch (link_bw_code) {
+       default:
+               WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
+                    link_bw_code);
         case DP_LINK_BW_1_62:
+               return 1;
         case DP_LINK_BW_2_7:
+               return 2;
         case DP_LINK_BW_5_4:
-               break;
-       default:
-               WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
-                    max_link_bw);
-               max_link_bw = DP_LINK_BW_1_62;
-               break;
+               return 3;
         }
-       return max_link_bw;
  }
  
-static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp)
+/* update sink rates from dpcd */
+static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
  {
-       struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-       u8 source_max, sink_max;
+       int i, num_rates;
+
+       num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]);
+
+       for (i = 0; i < num_rates; i++)
+               intel_dp->sink_rates[i] = default_rates[i];
  
-       source_max = intel_dig_port->max_lanes;
-       sink_max = intel_dp->max_sink_lane_count;
+       intel_dp->num_sink_rates = num_rates;
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_rate(struct intel_dp *intel_dp)
+{
+       return intel_dp->common_rates[intel_dp->num_common_rates - 1];
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp)
+{
+       struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+       int source_max = intel_dig_port->max_lanes;
+       int sink_max = drm_dp_max_lane_count(intel_dp->dpcd);
  
         return min(source_max, sink_max);
  }
  
+int intel_dp_max_lane_count(struct intel_dp *intel_dp)
+{
+       return intel_dp->max_link_lane_count;
+}
+
  int
  intel_dp_link_required(int pixel_clock, int bpp)
  {
@@ -205,34 +224,25 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp)
         return max_dotclk;
  }
  
-static int
-intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates)
-{
-       if (intel_dp->num_sink_rates) {
-               *sink_rates = intel_dp->sink_rates;
-               return intel_dp->num_sink_rates;
-       }
-
-       *sink_rates = default_rates;
-
-       return (intel_dp->max_sink_link_bw >> 3) + 1;
-}
-
-static int
-intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
+static void
+intel_dp_set_source_rates(struct intel_dp *intel_dp)
  {
         struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
         struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+       const int *source_rates;
         int size;
  
+       /* This should only be done once */
+       WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates);
+
         if (IS_GEN9_LP(dev_priv)) {
-               *source_rates = bxt_rates;
+               source_rates = bxt_rates;
                 size = ARRAY_SIZE(bxt_rates);
         } else if (IS_GEN9_BC(dev_priv)) {
-               *source_rates = skl_rates;
+               source_rates = skl_rates;
                 size = ARRAY_SIZE(skl_rates);
         } else {
-               *source_rates = default_rates;
+               source_rates = default_rates;
                 size = ARRAY_SIZE(default_rates);
         }
  
@@ -240,7 +250,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
         if (!intel_dp_source_supports_hbr2(intel_dp))
                 size--;
  
-       return size;
+       intel_dp->source_rates = source_rates;
+       intel_dp->num_source_rates = size;
  }
  
  static int intersect_rates(const int *source_rates, int source_len,
@@ -266,50 +277,83 @@ static int intersect_rates(const int *source_rates, int source_len,
         return k;
  }
  
-static int intel_dp_common_rates(struct intel_dp *intel_dp,
-                                int *common_rates)
+/* return index of rate in rates array, or -1 if not found */
+static int intel_dp_rate_index(const int *rates, int len, int rate)
  {
-       const int *source_rates, *sink_rates;
-       int source_len, sink_len;
+       int i;
  
-       sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-       source_len = intel_dp_source_rates(intel_dp, &source_rates);
+       for (i = 0; i < len; i++)
+               if (rate == rates[i])
+                       return i;
  
-       return intersect_rates(source_rates, source_len,
-                              sink_rates, sink_len,
-                              common_rates);
+       return -1;
  }
  
-static int intel_dp_link_rate_index(struct intel_dp *intel_dp,
-                                   int *common_rates, int link_rate)
+static void intel_dp_set_common_rates(struct intel_dp *intel_dp)
  {
-       int common_len;
-       int index;
+       WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates);
+
+       intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates,
+                                                    intel_dp->num_source_rates,
+                                                    intel_dp->sink_rates,
+                                                    intel_dp->num_sink_rates,
+                                                    intel_dp->common_rates);
  
-       common_len = intel_dp_common_rates(intel_dp, common_rates);
-       for (index = 0; index < common_len; index++) {
-               if (link_rate == common_rates[common_len - index - 1])
-                       return common_len - index - 1;
+       /* Paranoia, there should always be something in common. */
+       if (WARN_ON(intel_dp->num_common_rates == 0)) {
+               intel_dp->common_rates[0] = default_rates[0];
+               intel_dp->num_common_rates = 1;
         }
+}
  
-       return -1;
+/* get length of common rates potentially limited by max_rate */
+static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp,
+                                         int max_rate)
+{
+       const int *common_rates = intel_dp->common_rates;
+       int i, common_len = intel_dp->num_common_rates;
+
+       /* Limit results by potentially reduced max rate */
+       for (i = 0; i < common_len; i++) {
+               if (common_rates[common_len - i - 1] <= max_rate)
+                       return common_len - i;
+       }
+
+       return 0;
+}
+
+static bool intel_dp_link_params_valid(struct intel_dp *intel_dp)
+{
+       /*
+        * FIXME: we need to synchronize the current link parameters with
+        * hardware readout. Currently fast link training doesn't work on
+        * boot-up.
+        */
+       if (intel_dp->link_rate == 0 ||
+           intel_dp->link_rate > intel_dp->max_link_rate)
+               return false;
+
+       if (intel_dp->lane_count == 0 ||
+           intel_dp->lane_count > intel_dp_max_lane_count(intel_dp))
+               return false;
+
+       return true;
  }
  
  int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
                                             int link_rate, uint8_t lane_count)
  {
-       int common_rates[DP_MAX_SUPPORTED_RATES];
-       int link_rate_index;
+       int index;
  
-       link_rate_index = intel_dp_link_rate_index(intel_dp,
-                                                  common_rates,
-                                                  link_rate);
-       if (link_rate_index > 0) {
-               intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]);
-               intel_dp->max_sink_lane_count = lane_count;
+       index = intel_dp_rate_index(intel_dp->common_rates,
+                                   intel_dp->num_common_rates,
+                                   link_rate);
+       if (index > 0) {
+               intel_dp->max_link_rate = intel_dp->common_rates[index - 1];
+               intel_dp->max_link_lane_count = lane_count;
         } else if (lane_count > 1) {
-               intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
-               intel_dp->max_sink_lane_count = lane_count >> 1;
+               intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+               intel_dp->max_link_lane_count = lane_count >> 1;
         } else {
                 DRM_ERROR("Link Training Unsuccessful\n");
                 return -1;
@@ -1486,24 +1530,21 @@ static void snprintf_int_array(char *str, size_t len,
  
  static void intel_dp_print_rates(struct intel_dp *intel_dp)
  {
-       const int *source_rates, *sink_rates;
-       int source_len, sink_len, common_len;
-       int common_rates[DP_MAX_SUPPORTED_RATES];
         char str[128]; /* FIXME: too big for stack? */
  
         if ((drm_debug & DRM_UT_KMS) == 0)
                 return;
  
-       source_len = intel_dp_source_rates(intel_dp, &source_rates);
-       snprintf_int_array(str, sizeof(str), source_rates, source_len);
+       snprintf_int_array(str, sizeof(str),
+                          intel_dp->source_rates, intel_dp->num_source_rates);
         DRM_DEBUG_KMS("source rates: %s\n", str);
  
-       sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-       snprintf_int_array(str, sizeof(str), sink_rates, sink_len);
+       snprintf_int_array(str, sizeof(str),
+                          intel_dp->sink_rates, intel_dp->num_sink_rates);
         DRM_DEBUG_KMS("sink rates: %s\n", str);
  
-       common_len = intel_dp_common_rates(intel_dp, common_rates);
-       snprintf_int_array(str, sizeof(str), common_rates, common_len);
+       snprintf_int_array(str, sizeof(str),
+                          intel_dp->common_rates, intel_dp->num_common_rates);
         DRM_DEBUG_KMS("common rates: %s\n", str);
  }
  
@@ -1538,39 +1579,34 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp)
         return true;
  }
  
-static int rate_to_index(int find, const int *rates)
-{
-       int i = 0;
-
-       for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i)
-               if (find == rates[i])
-                       break;
-
-       return i;
-}
-
  int
  intel_dp_max_link_rate(struct intel_dp *intel_dp)
  {
-       int rates[DP_MAX_SUPPORTED_RATES] = {};
         int len;
  
-       len = intel_dp_common_rates(intel_dp, rates);
+       len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate);
         if (WARN_ON(len <= 0))
                 return 162000;
  
-       return rates[len - 1];
+       return intel_dp->common_rates[len - 1];
  }
  
  int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
  {
-       return rate_to_index(rate, intel_dp->sink_rates);
+       int i = intel_dp_rate_index(intel_dp->sink_rates,
+                                   intel_dp->num_sink_rates, rate);
+
+       if (WARN_ON(i < 0))
+               i = 0;
+
+       return i;
  }
  
  void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
                            uint8_t *link_bw, uint8_t *rate_select)
  {
-       if (intel_dp->num_sink_rates) {
+       /* eDP 1.4 rate select method. */
+       if (intel_dp->use_rate_select) {
                 *link_bw = 0;
                 *rate_select =
                         intel_dp_rate_select(intel_dp, port_clock);
@@ -1618,14 +1654,13 @@ intel_dp_compute_config(struct intel_encoder *encoder,
         /* Conveniently, the link BW constants become indices with a shift...*/
         int min_clock = 0;
         int max_clock;
-       int link_rate_index;
         int bpp, mode_rate;
         int link_avail, link_clock;
-       int common_rates[DP_MAX_SUPPORTED_RATES] = {};
         int common_len;
         uint8_t link_bw, rate_select;
  
-       common_len = intel_dp_common_rates(intel_dp, common_rates);
+       common_len = intel_dp_common_len_rate_limit(intel_dp,
+                                                   intel_dp->max_link_rate);
  
         /* No common link rates between source and sink */
         WARN_ON(common_len <= 0);
@@ -1662,16 +1697,18 @@ intel_dp_compute_config(struct intel_encoder *encoder,
  
         /* Use values requested by Compliance Test Request */
         if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
-               link_rate_index = intel_dp_link_rate_index(intel_dp,
-                                                          common_rates,
-                                                          intel_dp->compliance.test_link_rate);
-               if (link_rate_index >= 0)
-                       min_clock = max_clock = link_rate_index;
+               int index;
+
+               index = intel_dp_rate_index(intel_dp->common_rates,
+                                           intel_dp->num_common_rates,
+                                           intel_dp->compliance.test_link_rate);
+               if (index >= 0)
+                       min_clock = max_clock = index;
                 min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count;
         }
         DRM_DEBUG_KMS("DP link computation with max lane count %i "
                       "max bw %d pixel clock %iKHz\n",
-                     max_lane_count, common_rates[max_clock],
+                     max_lane_count, intel_dp->common_rates[max_clock],
                       adjusted_mode->crtc_clock);
  
         /* Walk through all bpp values. Luckily they're all nicely spaced with 2
@@ -1707,7 +1744,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
                                 lane_count <= max_lane_count;
                                 lane_count <<= 1) {
  
-                               link_clock = common_rates[clock];
+                               link_clock = intel_dp->common_rates[clock];
                                 link_avail = intel_dp_max_data_rate(link_clock,
                                                                     lane_count);
  
@@ -1739,7 +1776,7 @@ found:
         pipe_config->lane_count = lane_count;
  
         pipe_config->pipe_bpp = bpp;
-       pipe_config->port_clock = common_rates[clock];
+       pipe_config->port_clock = intel_dp->common_rates[clock];
  
         intel_dp_compute_rate(intel_dp, pipe_config->port_clock,
                               &link_bw, &rate_select);
@@ -3051,7 +3088,8 @@ static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp)
  {
         uint8_t psr_caps = 0;
  
-       drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps);
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1)
+               return false;
         return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED;
  }
  
@@ -3059,9 +3097,9 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
  {
         uint8_t dprx = 0;
  
-       drm_dp_dpcd_readb(&intel_dp->aux,
-                       DP_DPRX_FEATURE_ENUMERATION_LIST,
-                       &dprx);
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
+                             &dprx) != 1)
+               return false;
         return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
  }
  
@@ -3069,7 +3107,9 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
  {
         uint8_t alpm_caps = 0;
  
-       drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps);
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP,
+                             &alpm_caps) != 1)
+               return false;
         return alpm_caps & DP_ALPM_CAP;
  }
  
@@ -3642,9 +3682,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
                 uint8_t frame_sync_cap;
  
                 dev_priv->psr.sink_support = true;
-               drm_dp_dpcd_read(&intel_dp->aux,
-                                DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
-                                &frame_sync_cap, 1);
+               if (drm_dp_dpcd_readb(&intel_dp->aux,
+                                     DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
+                                     &frame_sync_cap) != 1)
+                       frame_sync_cap = 0;
                 dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
                 /* PSR2 needs frame sync as well */
                 dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
@@ -3695,6 +3736,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
                 intel_dp->num_sink_rates = i;
         }
  
+       if (intel_dp->num_sink_rates)
+               intel_dp->use_rate_select = true;
+       else
+               intel_dp_set_sink_rates(intel_dp);
+
+       intel_dp_set_common_rates(intel_dp);
+
         return true;
  }
  
@@ -3702,11 +3750,18 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
  static bool
  intel_dp_get_dpcd(struct intel_dp *intel_dp)
  {
+       u8 sink_count;
+
         if (!intel_dp_read_dpcd(intel_dp))
                 return false;
  
-       if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT,
-                            &intel_dp->sink_count, 1) < 0)
+       /* Don't clobber cached eDP rates. */
+       if (!is_edp(intel_dp)) {
+               intel_dp_set_sink_rates(intel_dp);
+               intel_dp_set_common_rates(intel_dp);
+       }
+
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0)
                 return false;
  
         /*
@@ -3714,7 +3769,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
          * a member variable in intel_dp will track any changes
          * between short pulse interrupts.
          */
-       intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count);
+       intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count);
  
         /*
          * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
@@ -3743,7 +3798,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
  static bool
  intel_dp_can_mst(struct intel_dp *intel_dp)
  {
-       u8 buf[1];
+       u8 mstm_cap;
  
         if (!i915.enable_dp_mst)
                 return false;
@@ -3754,10 +3809,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
         if (intel_dp->dpcd[DP_DPCD_REV] < 0x12)
                 return false;
  
-       if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1)
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1)
                 return false;
  
-       return buf[0] & DP_MST_CAP;
+       return mstm_cap & DP_MST_CAP;
  }
  
  static void
@@ -3903,9 +3958,8 @@ stop:
  static bool
  intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
  {
-       return drm_dp_dpcd_read(&intel_dp->aux,
-                                      DP_DEVICE_SERVICE_IRQ_VECTOR,
-                                      sink_irq_vector, 1) == 1;
+       return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
+                                sink_irq_vector) == 1;
  }
  
  static bool
@@ -3926,7 +3980,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
  {
         int status = 0;
         int min_lane_count = 1;
-       int common_rates[DP_MAX_SUPPORTED_RATES] = {};
         int link_rate_index, test_link_rate;
         uint8_t test_lane_count, test_link_bw;
         /* (DP CTS 1.2)
@@ -3943,7 +3996,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
         test_lane_count &= DP_MAX_LANE_COUNT_MASK;
         /* Validate the requested lane count */
         if (test_lane_count < min_lane_count ||
-           test_lane_count > intel_dp->max_sink_lane_count)
+           test_lane_count > intel_dp->max_link_lane_count)
                 return DP_TEST_NAK;
  
         status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE,
@@ -3954,9 +4007,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
         }
         /* Validate the requested link rate */
         test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw);
-       link_rate_index = intel_dp_link_rate_index(intel_dp,
-                                                  common_rates,
-                                                  test_link_rate);
+       link_rate_index = intel_dp_rate_index(intel_dp->common_rates,
+                                             intel_dp->num_common_rates,
+                                             test_link_rate);
         if (link_rate_index < 0)
                 return DP_TEST_NAK;
  
@@ -3969,13 +4022,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
  static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
  {
         uint8_t test_pattern;
-       uint16_t test_misc;
+       uint8_t test_misc;
         __be16 h_width, v_height;
         int status = 0;
  
         /* Read the TEST_PATTERN (DP CTS 3.1.5) */
-       status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN,
-                                 &test_pattern, 1);
+       status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN,
+                                  &test_pattern);
         if (status <= 0) {
                 DRM_DEBUG_KMS("Test pattern read failed\n");
                 return DP_TEST_NAK;
@@ -3997,8 +4050,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
                 return DP_TEST_NAK;
         }
  
-       status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0,
-                                 &test_misc, 1);
+       status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0,
+                                  &test_misc);
         if (status <= 0) {
                 DRM_DEBUG_KMS("TEST MISC read failed\n");
                 return DP_TEST_NAK;
@@ -4057,10 +4110,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
                  */
                 block += intel_connector->detect_edid->extensions;
  
-               if (!drm_dp_dpcd_write(&intel_dp->aux,
-                                       DP_TEST_EDID_CHECKSUM,
-                                       &block->checksum,
-                                       1))
+               if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM,
+                                      block->checksum) <= 0)
                         DRM_DEBUG_KMS("Failed to write EDID checksum\n");
  
                 test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -4224,9 +4275,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
         if (!to_intel_crtc(intel_encoder->base.crtc)->active)
                 return;
  
-       /* FIXME: we need to synchronize this sort of stuff with hardware
-        * readout. Currently fast link training doesn't work on boot-up. */
-       if (!intel_dp->lane_count)
+       /*
+        * Validate the cached values of intel_dp->link_rate and
+        * intel_dp->lane_count before attempting to retrain.
+        */
+       if (!intel_dp_link_params_valid(intel_dp))
                 return;
  
         /* Retrain if Channel EQ or CR not ok */
@@ -4613,11 +4666,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
                       yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
  
         if (intel_dp->reset_link_params) {
-               /* Set the max lane count for sink */
-               intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+               /* Initial max link lane count */
+               intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
  
-               /* Set the max link BW for sink */
-               intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
+               /* Initial max link rate */
+               intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
  
                 intel_dp->reset_link_params = false;
         }
@@ -5127,7 +5180,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port)
         return intel_bios_is_port_edp(dev_priv, port);
  }
  
-void
+static void
  intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
  {
         struct intel_connector *intel_connector = to_intel_connector(connector);
@@ -5932,6 +5985,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port)
         }
  }
  
+static void intel_dp_modeset_retry_work_fn(struct work_struct *work)
+{
+       struct intel_connector *intel_connector;
+       struct drm_connector *connector;
+
+       intel_connector = container_of(work, typeof(*intel_connector),
+                                      modeset_retry_work);
+       connector = &intel_connector->base;
+       DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
+                     connector->name);
+
+       /* Grab the locks before changing connector property*/
+       mutex_lock(&connector->dev->mode_config.mutex);
+       /* Set connector link status to BAD and send a Uevent to notify
+        * userspace to do a modeset.
+        */
+       drm_mode_connector_set_link_status_property(connector,
+                                                   DRM_MODE_LINK_STATUS_BAD);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+       /* Send Hotplug uevent so userspace can reprobe */
+       drm_kms_helper_hotplug_event(connector->dev);
+}
+
  bool
  intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
                         struct intel_connector *intel_connector)
@@ -5944,11 +6020,17 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
         enum port port = intel_dig_port->port;
         int type;
  
+       /* Initialize the work for modeset in case of link train failure */
+       INIT_WORK(&intel_connector->modeset_retry_work,
+                 intel_dp_modeset_retry_work_fn);
+
         if (WARN(intel_dig_port->max_lanes < 1,
                  "Not enough lanes (%d) for DP on port %c\n",
                  intel_dig_port->max_lanes, port_name(port)))
                 return false;
  
+       intel_dp_set_source_rates(intel_dp);
+
         intel_dp->reset_link_params = true;
         intel_dp->pps_pipe = INVALID_PIPE;
         intel_dp->active_pipe = INVALID_PIPE;
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c

index 6532e226db29b63da766a8571de231de4f7261f6..a0995c00fc84ea67b330ac7516d9c4ab725c7494 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -28,6 +28,10 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
  {
         uint8_t reg_val = 0;
  
+       /* Early return when display use other mechanism to enable backlight. */
+       if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP))
+               return;
+
         if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
                               &reg_val) < 0) {
                 DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
@@ -97,15 +101,37 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector)
  {
         struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
         uint8_t dpcd_buf = 0;
+       uint8_t edp_backlight_mode = 0;
  
-       set_aux_backlight_enable(intel_dp, true);
+       if (drm_dp_dpcd_readb(&intel_dp->aux,
+                       DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) {
+               DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
+                             DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
+               return;
+       }
+
+       edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+
+       switch (edp_backlight_mode) {
+       case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM:
+       case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET:
+       case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT:
+               dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+               dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+               if (drm_dp_dpcd_writeb(&intel_dp->aux,
+                       DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf) < 0) {
+                       DRM_DEBUG_KMS("Failed to write aux backlight mode\n");
+               }
+               break;
+
+       /* Do nothing when it is already DPCD mode */
+       case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD:
+       default:
+               break;
+       }
  
-       if ((drm_dp_dpcd_readb(&intel_dp->aux,
-                              DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) == 1) &&
-           ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
-            DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET))
-               drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
-                                  (dpcd_buf | DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD));
+       set_aux_backlight_enable(intel_dp, true);
+       intel_dp_aux_set_backlight(connector, connector->panel.backlight.level);
  }
  
  static void intel_dp_aux_disable_backlight(struct intel_connector *connector)
@@ -143,9 +169,8 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector)
          * the panel can support backlight control over the aux channel
          */
         if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP &&
-           (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
-           !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) ||
-             (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) {
+           (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) &&
+           !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) {
                 DRM_DEBUG_KMS("AUX Backlight Control Supported!\n");
                 return true;
         }
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c

index 0048b520baf7c7bf210c2287b3d5c5f0139ceb52..b79c1c0e404cc3f2eb385115e06a3dc4c7176073 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
                 link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
         drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
  
-       if (intel_dp->num_sink_rates)
+       /* eDP 1.4 rate select method. */
+       if (!link_bw)
                 drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
                                   &rate_select, 1);
  
@@ -313,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp)
  void
  intel_dp_start_link_train(struct intel_dp *intel_dp)
  {
-       intel_dp_link_training_clock_recovery(intel_dp);
-       intel_dp_link_training_channel_equalization(intel_dp);
+       struct intel_connector *intel_connector = intel_dp->attached_connector;
+
+       if (!intel_dp_link_training_clock_recovery(intel_dp))
+               goto failure_handling;
+       if (!intel_dp_link_training_channel_equalization(intel_dp))
+               goto failure_handling;
+
+       DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d",
+                     intel_dp->link_rate, intel_dp->lane_count);
+       return;
+
+ failure_handling:
+       DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d",
+                     intel_dp->link_rate, intel_dp->lane_count);
+       if (!intel_dp_get_link_train_fallback_values(intel_dp,
+                                                    intel_dp->link_rate,
+                                                    intel_dp->lane_count))
+               /* Schedule a Hotplug Uevent to userspace to start modeset */
+               schedule_work(&intel_connector->modeset_retry_work);
+       return;
  }
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c

index 1dee9933005fdbc9860907d5a248d7063de1e482..3715386e427270bc7409224cd2312c7de9a28896 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -56,7 +56,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
          * for MST we always configure max link bw - the spec doesn't
          * seem to suggest we should do otherwise.
          */
-       lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+       lane_count = intel_dp_max_lane_count(intel_dp);
+
         pipe_config->lane_count = lane_count;
  
         pipe_config->pipe_bpp = bpp;
@@ -329,14 +330,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
         return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port);
  }
  
-static int
-intel_dp_mst_set_property(struct drm_connector *connector,
-                         struct drm_property *property,
-                         uint64_t val)
-{
-       return 0;
-}
-
  static void
  intel_dp_mst_connector_destroy(struct drm_connector *connector)
  {
@@ -353,8 +346,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = {
         .dpms = drm_atomic_helper_connector_dpms,
         .detect = intel_dp_mst_detect,
         .fill_modes = drm_helper_probe_single_connector_modes,
-       .set_property = intel_dp_mst_set_property,
-       .atomic_get_property = intel_connector_atomic_get_property,
+       .set_property = drm_atomic_helper_connector_set_property,
         .late_register = intel_connector_register,
         .early_unregister = intel_connector_unregister,
         .destroy = intel_dp_mst_connector_destroy,
@@ -378,7 +370,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
         int max_rate, mode_rate, max_lanes, max_link_clock;
  
         max_link_clock = intel_dp_max_link_rate(intel_dp);
-       max_lanes = drm_dp_max_lane_count(intel_dp->dpcd);
+       max_lanes = intel_dp_max_lane_count(intel_dp);
  
         max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
         mode_rate = intel_dp_link_required(mode->clock, bpp);
@@ -495,7 +487,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
                 drm_mode_connector_attach_encoder(&intel_connector->base,
                                                   &intel_dp->mst_encoders[i]->base.base);
         }
-       intel_dp_add_properties(intel_dp, connector);
  
         drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0);
         drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index 48ea8d9d49fe27960ff4b8447fe3c6aa1a48250b..bd500977b3fc63bb02053b3ed7efcb037c0c06e4 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -88,7 +88,6 @@
         int cpu, ret, timeout = (US) * 1000; \
         u64 base; \
         _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
-       BUILD_BUG_ON((US) > 50000); \
         if (!(ATOMIC)) { \
                 preempt_disable(); \
                 cpu = smp_processor_id(); \
@@ -130,8 +129,14 @@
         ret__; \
  })
  
-#define wait_for_atomic(COND, MS)      _wait_for_atomic((COND), (MS) * 1000, 1)
-#define wait_for_atomic_us(COND, US)   _wait_for_atomic((COND), (US), 1)
+#define wait_for_atomic_us(COND, US) \
+({ \
+       BUILD_BUG_ON(!__builtin_constant_p(US)); \
+       BUILD_BUG_ON((US) > 50000); \
+       _wait_for_atomic((COND), (US), 1); \
+})
+
+#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
  
  #define KHz(x) (1000 * (x))
  #define MHz(x) KHz(1000 * (x))
@@ -321,6 +326,9 @@ struct intel_connector {
         void *port; /* store this opaque as its illegal to dereference it */
  
         struct intel_dp *mst_port;
+
+       /* Work struct to schedule a uevent on link train failure */
+       struct work_struct modeset_retry_work;
  };
  
  struct dpll {
@@ -504,8 +512,8 @@ enum vlv_wm_level {
  };
  
  struct vlv_wm_state {
-       struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS];
-       struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS];
+       struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS];
+       struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS];
         uint8_t num_levels;
         bool cxsr;
  };
@@ -514,6 +522,22 @@ struct vlv_fifo_state {
         u16 plane[I915_MAX_PLANES];
  };
  
+enum g4x_wm_level {
+       G4X_WM_LEVEL_NORMAL,
+       G4X_WM_LEVEL_SR,
+       G4X_WM_LEVEL_HPLL,
+       NUM_G4X_WM_LEVELS,
+};
+
+struct g4x_wm_state {
+       struct g4x_pipe_wm wm;
+       struct g4x_sr_wm sr;
+       struct g4x_sr_wm hpll;
+       bool cxsr;
+       bool hpll_en;
+       bool fbc_en;
+};
+
  struct intel_crtc_wm_state {
         union {
                 struct {
@@ -541,7 +565,7 @@ struct intel_crtc_wm_state {
  
                 struct {
                         /* "raw" watermarks (not inverted) */
-                       struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS];
+                       struct g4x_pipe_wm raw[NUM_VLV_WM_LEVELS];
                         /* intermediate watermarks (inverted) */
                         struct vlv_wm_state intermediate;
                         /* optimal watermarks (inverted) */
@@ -549,6 +573,15 @@ struct intel_crtc_wm_state {
                         /* display FIFO split */
                         struct vlv_fifo_state fifo_state;
                 } vlv;
+
+               struct {
+                       /* "raw" watermarks */
+                       struct g4x_pipe_wm raw[NUM_G4X_WM_LEVELS];
+                       /* intermediate watermarks */
+                       struct g4x_wm_state intermediate;
+                       /* optimal watermarks */
+                       struct g4x_wm_state optimal;
+               } g4x;
         };
  
         /*
@@ -766,11 +799,6 @@ struct intel_crtc {
         int adjusted_x;
         int adjusted_y;
  
-       uint32_t cursor_addr;
-       uint32_t cursor_cntl;
-       uint32_t cursor_size;
-       uint32_t cursor_base;
-
         struct intel_crtc_state *config;
  
         /* global reset count when the last flip was submitted */
@@ -786,6 +814,7 @@ struct intel_crtc {
                 union {
                         struct intel_pipe_wm ilk;
                         struct vlv_wm_state vlv;
+                       struct g4x_wm_state g4x;
                 } active;
         } wm;
  
@@ -811,18 +840,22 @@ struct intel_plane {
         int max_downscale;
         uint32_t frontbuffer_bit;
  
+       struct {
+               u32 base, cntl, size;
+       } cursor;
+
         /*
          * NOTE: Do not place new plane state fields here (e.g., when adding
          * new plane properties).  New runtime state should now be placed in
          * the intel_plane_state structure and accessed via plane_state.
          */
  
-       void (*update_plane)(struct drm_plane *plane,
+       void (*update_plane)(struct intel_plane *plane,
                              const struct intel_crtc_state *crtc_state,
                              const struct intel_plane_state *plane_state);
-       void (*disable_plane)(struct drm_plane *plane,
-                             struct drm_crtc *crtc);
-       int (*check_plane)(struct drm_plane *plane,
+       void (*disable_plane)(struct intel_plane *plane,
+                             struct intel_crtc *crtc);
+       int (*check_plane)(struct intel_plane *plane,
                            struct intel_crtc_state *crtc_state,
                            struct intel_plane_state *state);
  };
@@ -948,13 +981,20 @@ struct intel_dp {
         uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
         uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
         uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE];
-       /* sink rates as reported by DP_SUPPORTED_LINK_RATES */
-       uint8_t num_sink_rates;
+       /* source rates */
+       int num_source_rates;
+       const int *source_rates;
+       /* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */
+       int num_sink_rates;
         int sink_rates[DP_MAX_SUPPORTED_RATES];
-       /* Max lane count for the sink as per DPCD registers */
-       uint8_t max_sink_lane_count;
-       /* Max link BW for the sink as per DPCD registers */
-       int max_sink_link_bw;
+       bool use_rate_select;
+       /* intersection of source and sink rates */
+       int num_common_rates;
+       int common_rates[DP_MAX_SUPPORTED_RATES];
+       /* Max lane count for the current link */
+       int max_link_lane_count;
+       /* Max rate for the current link */
+       int max_link_rate;
         /* sink or branch descriptor */
         struct intel_dp_desc desc;
         struct drm_dp_aux aux;
@@ -1491,10 +1531,10 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp);
  void intel_edp_panel_vdd_on(struct intel_dp *intel_dp);
  void intel_edp_panel_on(struct intel_dp *intel_dp);
  void intel_edp_panel_off(struct intel_dp *intel_dp);
-void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector);
  void intel_dp_mst_suspend(struct drm_device *dev);
  void intel_dp_mst_resume(struct drm_device *dev);
  int intel_dp_max_link_rate(struct intel_dp *intel_dp);
+int intel_dp_max_lane_count(struct intel_dp *intel_dp);
  int intel_dp_rate_select(struct intel_dp *intel_dp, int rate);
  void intel_dp_hot_plug(struct intel_encoder *intel_encoder);
  void intel_power_sequencer_reset(struct drm_i915_private *dev_priv);
@@ -1825,6 +1865,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
                     struct intel_rps_client *rps,
                     unsigned long submitted);
  void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
+void g4x_wm_get_hw_state(struct drm_device *dev);
  void vlv_wm_get_hw_state(struct drm_device *dev);
  void ilk_wm_get_hw_state(struct drm_device *dev);
  void skl_wm_get_hw_state(struct drm_device *dev);
@@ -1832,6 +1873,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
                           struct skl_ddb_allocation *ddb /* out */);
  void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
                               struct skl_pipe_wm *out);
+void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
  void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
  bool intel_can_enable_sagv(struct drm_atomic_state *state);
  int intel_enable_sagv(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c

index 3ffe8b1f1d486f5e7352f50a62091cbb60831c83..fc0ef492252ac7c93f7b7ccb4e6a4b95d30f6294 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder)
                 val |= (ULPS_STATE_ENTER | DEVICE_READY);
                 I915_WRITE(MIPI_DEVICE_READY(port), val);
  
-               /* Wait for ULPS Not active */
+               /* Wait for ULPS active */
                 if (intel_wait_for_register(dev_priv,
-                               MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE,
-                               GLK_ULPS_NOT_ACTIVE, 20))
-                       DRM_ERROR("ULPS is still active\n");
+                               MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20))
+                       DRM_ERROR("ULPS not active\n");
  
                 /* Exit ULPS */
                 val = I915_READ(MIPI_DEVICE_READY(port));
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c

index 0dce7792643abb414055fe262de09298e5d485e3..7158c7ce9c0941a05654261e2443bac8b2315b08 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -694,8 +694,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
                                                 clk_zero_cnt << 8 | prepare_cnt;
  
         /*
-        * LP to HS switch count = 4TLPX + PREP_COUNT * 2 + EXIT_ZERO_COUNT * 2
-        *                                      + 10UI + Extra Byte Count
+        * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT *
+        *                                      mul + 10UI + Extra Byte Count
          *
          * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
          * Extra Byte Count is calculated according to number of lanes.
@@ -708,8 +708,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
         /* B044 */
         /* FIXME:
          * The comment above does not match with the code */
-       lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * 2 +
-                                               exit_zero_cnt * 2 + 10, 8);
+       lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul +
+                                               exit_zero_cnt * mul + 10, 8);
  
         hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
  
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c

index 6025839ed3b7b1140994848feb0cbb57d135d10a..c1544a53095ddb353430f9468b2e644dff4ea673 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = {
         .early_unregister = intel_connector_unregister,
         .destroy = intel_dvo_destroy,
         .fill_modes = drm_helper_probe_single_connector_modes,
-       .atomic_get_property = intel_connector_atomic_get_property,
+       .set_property = drm_atomic_helper_connector_set_property,
         .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
         .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
  };
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index 854e8e0c836bd2099c1cfcb72e12e3ec5ff21915..413bfd8d4bf489ec73d4ce7677a223bf4465dca3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,69 +26,177 @@
  #include "intel_ringbuffer.h"
  #include "intel_lrc.h"
  
-static const struct engine_info {
+/* Haswell does have the CXT_SIZE register however it does not appear to be
+ * valid. Now, docs explain in dwords what is in the context object. The full
+ * size is 70720 bytes, however, the power context and execlist context will
+ * never be saved (power context is stored elsewhere, and execlists don't work
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
+ */
+#define HSW_CXT_TOTAL_SIZE             (17 * PAGE_SIZE)
+/* Same as Haswell, but 72064 bytes now. */
+#define GEN8_CXT_TOTAL_SIZE            (18 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_RENDER_SIZE    (20 * PAGE_SIZE)
+#define GEN9_LR_CONTEXT_RENDER_SIZE    (22 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_OTHER_SIZE     ( 2 * PAGE_SIZE)
+
+struct engine_class_info {
         const char *name;
-       unsigned int exec_id;
+       int (*init_legacy)(struct intel_engine_cs *engine);
+       int (*init_execlists)(struct intel_engine_cs *engine);
+};
+
+static const struct engine_class_info intel_engine_classes[] = {
+       [RENDER_CLASS] = {
+               .name = "rcs",
+               .init_execlists = logical_render_ring_init,
+               .init_legacy = intel_init_render_ring_buffer,
+       },
+       [COPY_ENGINE_CLASS] = {
+               .name = "bcs",
+               .init_execlists = logical_xcs_ring_init,
+               .init_legacy = intel_init_blt_ring_buffer,
+       },
+       [VIDEO_DECODE_CLASS] = {
+               .name = "vcs",
+               .init_execlists = logical_xcs_ring_init,
+               .init_legacy = intel_init_bsd_ring_buffer,
+       },
+       [VIDEO_ENHANCEMENT_CLASS] = {
+               .name = "vecs",
+               .init_execlists = logical_xcs_ring_init,
+               .init_legacy = intel_init_vebox_ring_buffer,
+       },
+};
+
+struct engine_info {
         unsigned int hw_id;
+       unsigned int uabi_id;
+       u8 class;
+       u8 instance;
         u32 mmio_base;
         unsigned irq_shift;
-       int (*init_legacy)(struct intel_engine_cs *engine);
-       int (*init_execlists)(struct intel_engine_cs *engine);
-} intel_engines[] = {
+};
+
+static const struct engine_info intel_engines[] = {
         [RCS] = {
-               .name = "rcs",
                 .hw_id = RCS_HW,
-               .exec_id = I915_EXEC_RENDER,
+               .uabi_id = I915_EXEC_RENDER,
+               .class = RENDER_CLASS,
+               .instance = 0,
                 .mmio_base = RENDER_RING_BASE,
                 .irq_shift = GEN8_RCS_IRQ_SHIFT,
-               .init_execlists = logical_render_ring_init,
-               .init_legacy = intel_init_render_ring_buffer,
         },
         [BCS] = {
-               .name = "bcs",
                 .hw_id = BCS_HW,
-               .exec_id = I915_EXEC_BLT,
+               .uabi_id = I915_EXEC_BLT,
+               .class = COPY_ENGINE_CLASS,
+               .instance = 0,
                 .mmio_base = BLT_RING_BASE,
                 .irq_shift = GEN8_BCS_IRQ_SHIFT,
-               .init_execlists = logical_xcs_ring_init,
-               .init_legacy = intel_init_blt_ring_buffer,
         },
         [VCS] = {
-               .name = "vcs",
                 .hw_id = VCS_HW,
-               .exec_id = I915_EXEC_BSD,
+               .uabi_id = I915_EXEC_BSD,
+               .class = VIDEO_DECODE_CLASS,
+               .instance = 0,
                 .mmio_base = GEN6_BSD_RING_BASE,
                 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
-               .init_execlists = logical_xcs_ring_init,
-               .init_legacy = intel_init_bsd_ring_buffer,
         },
         [VCS2] = {
-               .name = "vcs2",
                 .hw_id = VCS2_HW,
-               .exec_id = I915_EXEC_BSD,
+               .uabi_id = I915_EXEC_BSD,
+               .class = VIDEO_DECODE_CLASS,
+               .instance = 1,
                 .mmio_base = GEN8_BSD2_RING_BASE,
                 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
-               .init_execlists = logical_xcs_ring_init,
-               .init_legacy = intel_init_bsd2_ring_buffer,
         },
         [VECS] = {
-               .name = "vecs",
                 .hw_id = VECS_HW,
-               .exec_id = I915_EXEC_VEBOX,
+               .uabi_id = I915_EXEC_VEBOX,
+               .class = VIDEO_ENHANCEMENT_CLASS,
+               .instance = 0,
                 .mmio_base = VEBOX_RING_BASE,
                 .irq_shift = GEN8_VECS_IRQ_SHIFT,
-               .init_execlists = logical_xcs_ring_init,
-               .init_legacy = intel_init_vebox_ring_buffer,
         },
  };
  
+/**
+ * ___intel_engine_context_size() - return the size of the context for an engine
+ * @dev_priv: i915 device private
+ * @class: engine class
+ *
+ * Each engine class may require a different amount of space for a context
+ * image.
+ *
+ * Return: size (in bytes) of an engine class specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+static u32
+__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+{
+       u32 cxt_size;
+
+       BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
+
+       switch (class) {
+       case RENDER_CLASS:
+               switch (INTEL_GEN(dev_priv)) {
+               default:
+                       MISSING_CASE(INTEL_GEN(dev_priv));
+               case 9:
+                       return GEN9_LR_CONTEXT_RENDER_SIZE;
+               case 8:
+                       return i915.enable_execlists ?
+                              GEN8_LR_CONTEXT_RENDER_SIZE :
+                              GEN8_CXT_TOTAL_SIZE;
+               case 7:
+                       if (IS_HASWELL(dev_priv))
+                               return HSW_CXT_TOTAL_SIZE;
+
+                       cxt_size = I915_READ(GEN7_CXT_SIZE);
+                       return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
+                                       PAGE_SIZE);
+               case 6:
+                       cxt_size = I915_READ(CXT_SIZE);
+                       return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
+                                       PAGE_SIZE);
+               case 5:
+               case 4:
+               case 3:
+               case 2:
+               /* For the special day when i810 gets merged. */
+               case 1:
+                       return 0;
+               }
+               break;
+       default:
+               MISSING_CASE(class);
+       case VIDEO_DECODE_CLASS:
+       case VIDEO_ENHANCEMENT_CLASS:
+       case COPY_ENGINE_CLASS:
+               if (INTEL_GEN(dev_priv) < 8)
+                       return 0;
+               return GEN8_LR_CONTEXT_OTHER_SIZE;
+       }
+}
+
  static int
  intel_engine_setup(struct drm_i915_private *dev_priv,
                    enum intel_engine_id id)
  {
         const struct engine_info *info = &intel_engines[id];
+       const struct engine_class_info *class_info;
         struct intel_engine_cs *engine;
  
+       GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
+       class_info = &intel_engine_classes[info->class];
+
         GEM_BUG_ON(dev_priv->engine[id]);
         engine = kzalloc(sizeof(*engine), GFP_KERNEL);
         if (!engine)
@@ -96,11 +204,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
  
         engine->id = id;
         engine->i915 = dev_priv;
-       engine->name = info->name;
-       engine->exec_id = info->exec_id;
+       WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
+                        class_info->name, info->instance) >=
+               sizeof(engine->name));
+       engine->uabi_id = info->uabi_id;
         engine->hw_id = engine->guc_id = info->hw_id;
         engine->mmio_base = info->mmio_base;
         engine->irq_shift = info->irq_shift;
+       engine->class = info->class;
+       engine->instance = info->instance;
+
+       engine->context_size = __intel_engine_context_size(dev_priv,
+                                                          engine->class);
+       if (WARN_ON(engine->context_size > BIT(20)))
+               engine->context_size = 0;
  
         /* Nothing to do here, execute in order of dependencies */
         engine->schedule = NULL;
@@ -112,18 +229,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
  }
  
  /**
- * intel_engines_init_early() - allocate the Engine Command Streamers
+ * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
   * @dev_priv: i915 device private
   *
   * Return: non-zero if the initialization failed.
   */
-int intel_engines_init_early(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
  {
         struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-       unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
-       unsigned int mask = 0;
+       const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
+       unsigned int mask = 0;
         unsigned int i;
         int err;
  
@@ -150,6 +267,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv)
         if (WARN_ON(mask != ring_mask))
                 device_info->ring_mask = mask;
  
+       /* We always presume we have at least RCS available for later probing */
+       if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
+               err = -ENODEV;
+               goto cleanup;
+       }
+
         device_info->num_rings = hweight32(mask);
  
         return 0;
@@ -161,7 +284,7 @@ cleanup:
  }
  
  /**
- * intel_engines_init() - allocate, populate and init the Engine Command Streamers
+ * intel_engines_init() - init the Engine Command Streamers
   * @dev_priv: i915 device private
   *
   * Return: non-zero if the initialization failed.
@@ -175,12 +298,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
         int err = 0;
  
         for_each_engine(engine, dev_priv, id) {
+               const struct engine_class_info *class_info =
+                       &intel_engine_classes[engine->class];
                 int (*init)(struct intel_engine_cs *engine);
  
                 if (i915.enable_execlists)
-                       init = intel_engines[id].init_execlists;
+                       init = class_info->init_execlists;
                 else
-                       init = intel_engines[id].init_legacy;
+                       init = class_info->init_legacy;
                 if (!init) {
                         kfree(engine);
                         dev_priv->engine[id] = NULL;
@@ -223,6 +348,9 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
  {
         struct drm_i915_private *dev_priv = engine->i915;
  
+       GEM_BUG_ON(!intel_engine_is_idle(engine));
+       GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
+
         /* Our semaphore implementation is strictly monotonic (i.e. we proceed
          * so long as the semaphore value in the register/page is greater
          * than the sync value), so whenever we reset the seqno,
@@ -253,13 +381,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
         intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
         clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
  
-       GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-       engine->hangcheck.seqno = seqno;
-
         /* After manually advancing the seqno, fake the interrupt in case
          * there are any waiters for that seqno.
          */
         intel_engine_wakeup(engine);
+
+       GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
  }
  
  static void intel_engine_init_timeline(struct intel_engine_cs *engine)
@@ -342,6 +469,7 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
   */
  int intel_engine_init_common(struct intel_engine_cs *engine)
  {
+       struct intel_ring *ring;
         int ret;
  
         engine->set_default_submission(engine);
@@ -353,9 +481,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
          * be available. To avoid this we always pin the default
          * context.
          */
-       ret = engine->context_pin(engine, engine->i915->kernel_context);
-       if (ret)
-               return ret;
+       ring = engine->context_pin(engine, engine->i915->kernel_context);
+       if (IS_ERR(ring))
+               return PTR_ERR(ring);
  
         ret = intel_engine_init_breadcrumbs(engine);
         if (ret)
@@ -723,8 +851,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
                  */
         }
  
+       /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk */
         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+                         GEN9_ENABLE_YV12_BUGFIX |
                           GEN9_ENABLE_GPGPU_PREEMPTION);
  
         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */
@@ -1086,17 +1216,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
  
+       /* More white lies, if wedged, hw state is inconsistent */
+       if (i915_terminally_wedged(&dev_priv->gpu_error))
+               return true;
+
         /* Any inflight/incomplete requests? */
         if (!i915_seqno_passed(intel_engine_get_seqno(engine),
                                intel_engine_last_submit(engine)))
                 return false;
  
+       if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
+               return true;
+
         /* Interrupt/tasklet pending? */
         if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
                 return false;
  
         /* Both ports drained, no more ELSP submission? */
-       if (engine->execlist_port[0].request)
+       if (port_request(&engine->execlist_port[0]))
                 return false;
  
         /* Ring stopped? */
@@ -1137,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
                 engine->set_default_submission(engine);
  }
  
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       for_each_engine(engine, i915, id) {
+               intel_engine_disarm_breadcrumbs(engine);
+               i915_gem_batch_pool_fini(&engine->batch_pool);
+               engine->no_priolist = false;
+       }
+}
+
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  #include "selftests/mock_engine.c"
  #endif
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c

index db7f8f0a1f36521dbb30dbe1019637230898df60..ff2fc5bc4af479b9c50855f47d8f55719865c007 100644 (file)
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1312,14 +1312,12 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
  
  static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
  {
-#ifdef CONFIG_INTEL_IOMMU
         /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
-       if (intel_iommu_gfx_mapped &&
+       if (intel_vtd_active() &&
             (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
                 DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
                 return true;
         }
-#endif
  
         return false;
  }
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c

new file mode 100644 (file)

index 0000000..c4cbec1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_ct.h"
+
+enum { CTB_SEND = 0, CTB_RECV = 1 };
+
+enum { CTB_OWNER_HOST = 0 };
+
+void intel_guc_ct_init_early(struct intel_guc_ct *ct)
+{
+       /* we're using static channel owners */
+       ct->host_channel.owner = CTB_OWNER_HOST;
+}
+
+static inline const char *guc_ct_buffer_type_to_str(u32 type)
+{
+       switch (type) {
+       case INTEL_GUC_CT_BUFFER_TYPE_SEND:
+               return "SEND";
+       case INTEL_GUC_CT_BUFFER_TYPE_RECV:
+               return "RECV";
+       default:
+               return "<invalid>";
+       }
+}
+
+static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
+                                   u32 cmds_addr, u32 size, u32 owner)
+{
+       DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
+                        desc, cmds_addr, size, owner);
+       memset(desc, 0, sizeof(*desc));
+       desc->addr = cmds_addr;
+       desc->size = size;
+       desc->owner = owner;
+}
+
+static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
+{
+       DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+                        desc, desc->head, desc->tail);
+       desc->head = 0;
+       desc->tail = 0;
+       desc->is_in_error = 0;
+}
+
+static int guc_action_register_ct_buffer(struct intel_guc *guc,
+                                        u32 desc_addr,
+                                        u32 type)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
+               desc_addr,
+               sizeof(struct guc_ct_buffer_desc),
+               type
+       };
+       int err;
+
+       /* Can't use generic send(), CT registration must go over MMIO */
+       err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+       if (err)
+               DRM_ERROR("CT: register %s buffer failed; err=%d\n",
+                         guc_ct_buffer_type_to_str(type), err);
+       return err;
+}
+
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
+                                          u32 owner,
+                                          u32 type)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
+               owner,
+               type
+       };
+       int err;
+
+       /* Can't use generic send(), CT deregistration must go over MMIO */
+       err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+       if (err)
+               DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
+                         guc_ct_buffer_type_to_str(type), owner, err);
+       return err;
+}
+
+static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
+{
+       return ctch->vma != NULL;
+}
+
+static int ctch_init(struct intel_guc *guc,
+                    struct intel_guc_ct_channel *ctch)
+{
+       struct i915_vma *vma;
+       void *blob;
+       int err;
+       int i;
+
+       GEM_BUG_ON(ctch->vma);
+
+       /* We allocate 1 page to hold both descriptors and both buffers.
+        *       ___________.....................
+        *      |desc (SEND)|                   :
+        *      |___________|                   PAGE/4
+        *      :___________....................:
+        *      |desc (RECV)|                   :
+        *      |___________|                   PAGE/4
+        *      :_______________________________:
+        *      |cmds (SEND)                    |
+        *      |                               PAGE/4
+        *      |_______________________________|
+        *      |cmds (RECV)                    |
+        *      |                               PAGE/4
+        *      |_______________________________|
+        *
+        * Each message can use a maximum of 32 dwords and we don't expect to
+        * have more than 1 in flight at any time, so we have enough space.
+        * Some logic further ahead will rely on the fact that there is only 1
+        * page and that it is always mapped, so if the size is changed the
+        * other code will need updating as well.
+        */
+
+       /* allocate vma */
+       vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err_out;
+       }
+       ctch->vma = vma;
+
+       /* map first page */
+       blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+       if (IS_ERR(blob)) {
+               err = PTR_ERR(blob);
+               goto err_vma;
+       }
+       DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
+
+       /* store pointers to desc and cmds */
+       for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+               GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+               ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+               ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+       }
+
+       return 0;
+
+err_vma:
+       i915_vma_unpin_and_release(&ctch->vma);
+err_out:
+       DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
+                        ctch->owner, err);
+       return err;
+}
+
+static void ctch_fini(struct intel_guc *guc,
+                     struct intel_guc_ct_channel *ctch)
+{
+       GEM_BUG_ON(!ctch->vma);
+
+       i915_gem_object_unpin_map(ctch->vma->obj);
+       i915_vma_unpin_and_release(&ctch->vma);
+}
+
+static int ctch_open(struct intel_guc *guc,
+                    struct intel_guc_ct_channel *ctch)
+{
+       u32 base;
+       int err;
+       int i;
+
+       DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n",
+                        ctch->owner, yesno(ctch_is_open(ctch)));
+
+       if (!ctch->vma) {
+               err = ctch_init(guc, ctch);
+               if (unlikely(err))
+                       goto err_out;
+       }
+
+       /* vma should be already allocated and map'ed */
+       base = guc_ggtt_offset(ctch->vma);
+
+       /* (re)initialize descriptors
+        * cmds buffers are in the second half of the blob page
+        */
+       for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+               GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+               guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+                                       base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
+                                       PAGE_SIZE/4,
+                                       ctch->owner);
+       }
+
+       /* register buffers, starting wirh RECV buffer
+        * descriptors are in first half of the blob
+        */
+       err = guc_action_register_ct_buffer(guc,
+                                           base + PAGE_SIZE/4 * CTB_RECV,
+                                           INTEL_GUC_CT_BUFFER_TYPE_RECV);
+       if (unlikely(err))
+               goto err_fini;
+
+       err = guc_action_register_ct_buffer(guc,
+                                           base + PAGE_SIZE/4 * CTB_SEND,
+                                           INTEL_GUC_CT_BUFFER_TYPE_SEND);
+       if (unlikely(err))
+               goto err_deregister;
+
+       return 0;
+
+err_deregister:
+       guc_action_deregister_ct_buffer(guc,
+                                       ctch->owner,
+                                       INTEL_GUC_CT_BUFFER_TYPE_RECV);
+err_fini:
+       ctch_fini(guc, ctch);
+err_out:
+       DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
+       return err;
+}
+
+static void ctch_close(struct intel_guc *guc,
+                      struct intel_guc_ct_channel *ctch)
+{
+       GEM_BUG_ON(!ctch_is_open(ctch));
+
+       guc_action_deregister_ct_buffer(guc,
+                                       ctch->owner,
+                                       INTEL_GUC_CT_BUFFER_TYPE_SEND);
+       guc_action_deregister_ct_buffer(guc,
+                                       ctch->owner,
+                                       INTEL_GUC_CT_BUFFER_TYPE_RECV);
+       ctch_fini(guc, ctch);
+}
+
+static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+{
+       /* For now it's trivial */
+       return ++ctch->next_fence;
+}
+
+static int ctb_write(struct intel_guc_ct_buffer *ctb,
+                    const u32 *action,
+                    u32 len /* in dwords */,
+                    u32 fence)
+{
+       struct guc_ct_buffer_desc *desc = ctb->desc;
+       u32 head = desc->head / 4;      /* in dwords */
+       u32 tail = desc->tail / 4;      /* in dwords */
+       u32 size = desc->size / 4;      /* in dwords */
+       u32 used;                       /* in dwords */
+       u32 header;
+       u32 *cmds = ctb->cmds;
+       unsigned int i;
+
+       GEM_BUG_ON(desc->size % 4);
+       GEM_BUG_ON(desc->head % 4);
+       GEM_BUG_ON(desc->tail % 4);
+       GEM_BUG_ON(tail >= size);
+
+       /*
+        * tail == head condition indicates empty. GuC FW does not support
+        * using up the entire buffer to get tail == head meaning full.
+        */
+       if (tail < head)
+               used = (size - head) + tail;
+       else
+               used = tail - head;
+
+       /* make sure there is a space including extra dw for the fence */
+       if (unlikely(used + len + 1 >= size))
+               return -ENOSPC;
+
+       /* Write the message. The format is the following:
+        * DW0: header (including action code)
+        * DW1: fence
+        * DW2+: action data
+        */
+       header = (len << GUC_CT_MSG_LEN_SHIFT) |
+                (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+                (action[0] << GUC_CT_MSG_ACTION_SHIFT);
+
+       cmds[tail] = header;
+       tail = (tail + 1) % size;
+
+       cmds[tail] = fence;
+       tail = (tail + 1) % size;
+
+       for (i = 1; i < len; i++) {
+               cmds[tail] = action[i];
+               tail = (tail + 1) % size;
+       }
+
+       /* now update desc tail (back in bytes) */
+       desc->tail = tail * 4;
+       GEM_BUG_ON(desc->tail > desc->size);
+
+       return 0;
+}
+
+/* Wait for the response from the GuC.
+ * @fence:     response fence
+ * @status:    placeholder for status
+ * return:     0 response received (status is valid)
+ *             -ETIMEDOUT no response within hardcoded timeout
+ *             -EPROTO no response, ct buffer was in error
+ */
+static int wait_for_response(struct guc_ct_buffer_desc *desc,
+                            u32 fence,
+                            u32 *status)
+{
+       int err;
+
+       /*
+        * Fast commands should complete in less than 10us, so sample quickly
+        * up to that length of time, then switch to a slower sleep-wait loop.
+        * No GuC command should ever take longer than 10ms.
+        */
+#define done (READ_ONCE(desc->fence) == fence)
+       err = wait_for_us(done, 10);
+       if (err)
+               err = wait_for(done, 10);
+#undef done
+
+       if (unlikely(err)) {
+               DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+                         fence, desc->fence);
+
+               if (WARN_ON(desc->is_in_error)) {
+                       /* Something went wrong with the messaging, try to reset
+                        * the buffer and hope for the best
+                        */
+                       guc_ct_buffer_desc_reset(desc);
+                       err = -EPROTO;
+               }
+       }
+
+       *status = desc->status;
+       return err;
+}
+
+static int ctch_send(struct intel_guc *guc,
+                    struct intel_guc_ct_channel *ctch,
+                    const u32 *action,
+                    u32 len,
+                    u32 *status)
+{
+       struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+       struct guc_ct_buffer_desc *desc = ctb->desc;
+       u32 fence;
+       int err;
+
+       GEM_BUG_ON(!ctch_is_open(ctch));
+       GEM_BUG_ON(!len);
+       GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+
+       fence = ctch_get_next_fence(ctch);
+       err = ctb_write(ctb, action, len, fence);
+       if (unlikely(err))
+               return err;
+
+       intel_guc_notify(guc);
+
+       err = wait_for_response(desc, fence, status);
+       if (unlikely(err))
+               return err;
+       if (*status != INTEL_GUC_STATUS_SUCCESS)
+               return -EIO;
+       return 0;
+}
+
+/*
+ * Command Transport (CT) buffer based GuC send function.
+ */
+static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
+{
+       struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+       u32 status = ~0; /* undefined */
+       int err;
+
+       mutex_lock(&guc->send_mutex);
+
+       err = ctch_send(guc, ctch, action, len, &status);
+       if (unlikely(err)) {
+               DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
+                         action[0], err, status);
+       }
+
+       mutex_unlock(&guc->send_mutex);
+       return err;
+}
+
+/**
+ * Enable buffer based command transport
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc:       the guc
+ * return:     0 on success
+ *             non-zero on failure
+ */
+int intel_guc_enable_ct(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+       struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+       int err;
+
+       GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+       err = ctch_open(guc, ctch);
+       if (unlikely(err))
+               return err;
+
+       /* Switch into cmd transport buffer based send() */
+       guc->send = intel_guc_send_ct;
+       DRM_INFO("CT: %s\n", enableddisabled(true));
+       return 0;
+}
+
+/**
+ * Disable buffer based command transport.
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc: the guc
+ */
+void intel_guc_disable_ct(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+       struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+
+       GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+       if (!ctch_is_open(ctch))
+               return;
+
+       ctch_close(guc, ctch);
+
+       /* Disable send */
+       guc->send = intel_guc_send_nop;
+       DRM_INFO("CT: %s\n", enableddisabled(false));
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h

new file mode 100644 (file)

index 0000000..6d97f36
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_GUC_CT_H_
+#define _INTEL_GUC_CT_H_
+
+struct intel_guc;
+struct i915_vma;
+
+#include "intel_guc_fwif.h"
+
+/**
+ * DOC: Command Transport (CT).
+ *
+ * Buffer based command transport is a replacement for MMIO based mechanism.
+ * It can be used to perform both host-2-guc and guc-to-host communication.
+ */
+
+/** Represents single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct intel_guc_ct_buffer {
+       struct guc_ct_buffer_desc *desc;
+       u32 *cmds;
+};
+
+/** Represents pair of command transport buffers.
+ *
+ * Buffers go in pairs to allow bi-directional communication.
+ * To simplify the code we place both of them in the same vma.
+ * Buffers from the same pair must share unique owner id.
+ *
+ * @vma: pointer to the vma with pair of CT buffers
+ * @ctbs: buffers for sending(0) and receiving(1) commands
+ * @owner: unique identifier
+ * @next_fence: fence to be used with next send command
+ */
+struct intel_guc_ct_channel {
+       struct i915_vma *vma;
+       struct intel_guc_ct_buffer ctbs[2];
+       u32 owner;
+       u32 next_fence;
+};
+
+/** Holds all command transport channels.
+ *
+ * @host_channel: main channel used by the host
+ */
+struct intel_guc_ct {
+       struct intel_guc_ct_channel host_channel;
+       /* other channels are tbd */
+};
+
+void intel_guc_ct_init_early(struct intel_guc_ct *ct);
+
+/* XXX: move to intel_uc.h ? don't fit there either */
+int intel_guc_enable_ct(struct intel_guc *guc);
+void intel_guc_disable_ct(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h

index cb36cbf3818f3386aa0ca6b490a23e4b630dc5f0..5fa28607481179fd914e2e6a3900a4d43d45ff21 100644 (file)
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -23,8 +23,8 @@
  #ifndef _INTEL_GUC_FWIF_H
  #define _INTEL_GUC_FWIF_H
  
-#define GFXCORE_FAMILY_GEN9            12
-#define GFXCORE_FAMILY_UNKNOWN         0x7fffffff
+#define GUC_CORE_FAMILY_GEN9           12
+#define GUC_CORE_FAMILY_UNKNOWN                0x7fffffff
  
  #define GUC_CLIENT_PRIORITY_KMD_HIGH   0
  #define GUC_CLIENT_PRIORITY_HIGH       1
@@ -331,6 +331,47 @@ struct guc_stage_desc {
         u64 desc_private;
  } __packed;
  
+/*
+ * Describes single command transport buffer.
+ * Used by both guc-master and clients.
+ */
+struct guc_ct_buffer_desc {
+       u32 addr;               /* gfx address */
+       u64 host_private;       /* host private data */
+       u32 size;               /* size in bytes */
+       u32 head;               /* offset updated by GuC*/
+       u32 tail;               /* offset updated by owner */
+       u32 is_in_error;        /* error indicator */
+       u32 fence;              /* fence updated by GuC */
+       u32 status;             /* status updated by GuC */
+       u32 owner;              /* id of the channel owner */
+       u32 owner_sub_id;       /* owner-defined field for extra tracking */
+       u32 reserved[5];
+} __packed;
+
+/* Type of command transport buffer */
+#define INTEL_GUC_CT_BUFFER_TYPE_SEND  0x0u
+#define INTEL_GUC_CT_BUFFER_TYPE_RECV  0x1u
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]   message len (in dwords)
+ * bit[7..5]   reserved
+ * bit[8]      write fence to desc
+ * bit[9]      write status to H2G buff
+ * bit[10]     send status (via G2H)
+ * bit[15..11] reserved
+ * bit[31..16] action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT                   0
+#define GUC_CT_MSG_LEN_MASK                    0x1F
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC         (1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF                (1 << 9)
+#define GUC_CT_MSG_SEND_STATUS                 (1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT                        16
+#define GUC_CT_MSG_ACTION_MASK                 0xFFFF
+
  #define GUC_FORCEWAKE_RENDER   (1 << 0)
  #define GUC_FORCEWAKE_MEDIA    (1 << 1)
  
@@ -515,6 +556,8 @@ enum intel_guc_action {
         INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
         INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
         INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+       INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+       INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
         INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
         INTEL_GUC_ACTION_LIMIT
  };
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c

index 8a1a023e48b28c838a2b4910f3d9b656ce4d3081..d9045b6e897b4b3a2b77f6fad411a9a180f297d4 100644 (file)
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -61,6 +61,9 @@
  #define KBL_FW_MAJOR 9
  #define KBL_FW_MINOR 14
  
+#define GLK_FW_MAJOR 10
+#define GLK_FW_MINOR 56
+
  #define GUC_FW_PATH(platform, major, minor) \
         "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
  
@@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
  #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
  MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
  
+#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
+
  
  static u32 get_gttype(struct drm_i915_private *dev_priv)
  {
@@ -86,11 +91,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv)
  
         switch (gen) {
         case 9:
-               return GFXCORE_FAMILY_GEN9;
+               return GUC_CORE_FAMILY_GEN9;
  
         default:
-               WARN(1, "GEN%d does not support GuC operation!\n", gen);
-               return GFXCORE_FAMILY_UNKNOWN;
+               MISSING_CASE(gen);
+               return GUC_CORE_FAMILY_UNKNOWN;
         }
  }
  
@@ -280,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
  
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  
-       /* init WOPCM */
-       I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-       I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE);
-
         /* Enable MIA caching. GuC clock gating is disabled. */
         I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
  
@@ -405,6 +406,10 @@ int intel_guc_select_fw(struct intel_guc *guc)
                 guc->fw.path = I915_KBL_GUC_UCODE;
                 guc->fw.major_ver_wanted = KBL_FW_MAJOR;
                 guc->fw.minor_ver_wanted = KBL_FW_MINOR;
+       } else if (IS_GEMINILAKE(dev_priv)) {
+               guc->fw.path = I915_GLK_GUC_UCODE;
+               guc->fw.major_ver_wanted = GLK_FW_MAJOR;
+               guc->fw.minor_ver_wanted = GLK_FW_MINOR;
         } else {
                 DRM_ERROR("No GuC firmware known for platform with GuC!\n");
                 return -ENOENT;
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c

index 6fb63a3c65b030e11de3c9770574da9fa82dbf9c..16d3b8719cab437758243639be4554cd60eb28de 100644 (file)
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc)
         void *vaddr;
         struct rchan *guc_log_relay_chan;
         size_t n_subbufs, subbuf_size;
-       int ret = 0;
+       int ret;
  
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
         GEM_BUG_ON(guc_log_has_runtime(guc));
  
+       ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
+       if (ret)
+               return ret;
+
         /* Create a WC (Uncached for read) vmalloc mapping of log
          * buffer pages, so that we can directly get the data
          * (up-to-date) from memory.
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c

index dce742243ba665b31f34268f99d779833ee09d3b..9b0ece427bdc8aeeb2eb28745c81bd18066f90c4 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
                                  "%s, ", engine->name);
         msg[len-2] = '\0';
  
-       return i915_handle_error(i915, hung, msg);
+       return i915_handle_error(i915, hung, "%s", msg);
  }
  
  /*
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c

index c6b8207724fadf64d7c58e7ed8ec8fb3c8284c70..58d690393b294b3f2b71ef6a004b14819dfe5b91 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
                         return false;
         }
  
+       /* Display Wa #1139 */
+       if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) &&
+           crtc_state->base.adjusted_mode.htotal > 5460)
+               return false;
+
         return true;
  }
  
@@ -1392,7 +1397,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
         }
  
         if (!pipe_config->bw_constrained) {
-               DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp);
+               DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp);
                 pipe_config->pipe_bpp = desired_bpp;
         }
  
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c

index 9ee819666a4c9c51745cdbc074d0b0d1896a1bbe..f5eb18d0e2d139e8a0609c59363344c11d0b280a 100644 (file)
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -52,6 +52,10 @@
  #define KBL_HUC_FW_MINOR 00
  #define KBL_BLD_NUM 1810
  
+#define GLK_HUC_FW_MAJOR 02
+#define GLK_HUC_FW_MINOR 00
+#define GLK_BLD_NUM 1748
+
  #define HUC_FW_PATH(platform, major, minor, bld_num) \
         "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
         __stringify(minor) "_" __stringify(bld_num) ".bin"
@@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
         KBL_HUC_FW_MINOR, KBL_BLD_NUM)
  MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
  
+#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
+       GLK_HUC_FW_MINOR, GLK_BLD_NUM)
+
  /**
   * huc_ucode_xfer() - DMA's the firmware
   * @dev_priv: the drm_i915_private device
@@ -99,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv)
  
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  
-       /* init WOPCM */
-       I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-       I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE |
-                       HUC_LOADING_AGENT_GUC);
-
         /* Set the source address for the uCode */
         offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
         I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
@@ -169,6 +171,10 @@ void intel_huc_select_fw(struct intel_huc *huc)
                 huc->fw.path = I915_KBL_HUC_UCODE;
                 huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR;
                 huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR;
+       } else if (IS_GEMINILAKE(dev_priv)) {
+               huc->fw.path = I915_GLK_HUC_UCODE;
+               huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR;
+               huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR;
         } else {
                 DRM_ERROR("No HuC firmware known for platform with HuC!\n");
                 return;
@@ -186,68 +192,36 @@ void intel_huc_select_fw(struct intel_huc *huc)
   * earlier call to intel_huc_init(), so here we need only check that
   * is succeeded, and then transfer the image to the h/w.
   *
- * Return:     non-zero code on error
   */
-int intel_huc_init_hw(struct intel_huc *huc)
+void intel_huc_init_hw(struct intel_huc *huc)
  {
         struct drm_i915_private *dev_priv = huc_to_i915(huc);
         int err;
  
-       if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE)
-               return 0;
-
         DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
                 huc->fw.path,
                 intel_uc_fw_status_repr(huc->fw.fetch_status),
                 intel_uc_fw_status_repr(huc->fw.load_status));
  
-       if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS &&
-           huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL)
-               return -ENOEXEC;
+       if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+               return;
  
         huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING;
  
-       switch (huc->fw.fetch_status) {
-       case INTEL_UC_FIRMWARE_FAIL:
-               /* something went wrong :( */
-               err = -EIO;
-               goto fail;
-
-       case INTEL_UC_FIRMWARE_NONE:
-       case INTEL_UC_FIRMWARE_PENDING:
-       default:
-               /* "can't happen" */
-               WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n",
-                       huc->fw.path,
-                       intel_uc_fw_status_repr(huc->fw.fetch_status),
-                       huc->fw.fetch_status);
-               err = -ENXIO;
-               goto fail;
-
-       case INTEL_UC_FIRMWARE_SUCCESS:
-               break;
-       }
-
         err = huc_ucode_xfer(dev_priv);
-       if (err)
-               goto fail;
  
-       huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS;
+       huc->fw.load_status = err ?
+               INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS;
  
         DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
                 huc->fw.path,
                 intel_uc_fw_status_repr(huc->fw.fetch_status),
                 intel_uc_fw_status_repr(huc->fw.load_status));
  
-       return 0;
-
-fail:
-       if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING)
-               huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
-
-       DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
+       if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+               DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
  
-       return err;
+       return;
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c

index 25d8e76489e40ff989fd616386f03b36f9ba03fa..3bf65288ffffd51719d0c4e8ce934ccd2d2f59e3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -63,6 +63,7 @@
  #include <linux/acpi.h>
  #include <linux/device.h>
  #include <linux/pci.h>
+#include <linux/pm_runtime.h>
  
  #include "i915_drv.h"
  #include <linux/delay.h>
@@ -110,6 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
         pinfo.size_data = sizeof(*pdata);
         pinfo.dma_mask = DMA_BIT_MASK(32);
  
+       pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes;
+       pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */
+       pdata->port[0].pipe = -1;
+       pdata->port[1].pipe = -1;
+       pdata->port[2].pipe = -1;
         spin_lock_init(&pdata->lpe_audio_slock);
  
         platdev = platform_device_register_full(&pinfo);
@@ -121,6 +127,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
  
         kfree(rsc);
  
+       pm_runtime_forbid(&platdev->dev);
+       pm_runtime_set_active(&platdev->dev);
+       pm_runtime_enable(&platdev->dev);
+
         return platdev;
  
  err:
@@ -144,44 +154,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
  
  static void lpe_audio_irq_unmask(struct irq_data *d)
  {
-       struct drm_i915_private *dev_priv = d->chip_data;
-       unsigned long irqflags;
-       u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-               I915_LPE_PIPE_B_INTERRUPT);
-
-       if (IS_CHERRYVIEW(dev_priv))
-               val |= I915_LPE_PIPE_C_INTERRUPT;
-
-       spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-       dev_priv->irq_mask &= ~val;
-       I915_WRITE(VLV_IIR, val);
-       I915_WRITE(VLV_IIR, val);
-       I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-       POSTING_READ(VLV_IMR);
-
-       spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
  }
  
  static void lpe_audio_irq_mask(struct irq_data *d)
  {
-       struct drm_i915_private *dev_priv = d->chip_data;
-       unsigned long irqflags;
-       u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-               I915_LPE_PIPE_B_INTERRUPT);
-
-       if (IS_CHERRYVIEW(dev_priv))
-               val |= I915_LPE_PIPE_C_INTERRUPT;
-
-       spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-       dev_priv->irq_mask |= val;
-       I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-       I915_WRITE(VLV_IIR, val);
-       I915_WRITE(VLV_IIR, val);
-       POSTING_READ(VLV_IIR);
-
-       spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
  }
  
  static struct irq_chip lpe_audio_irqchip = {
@@ -325,8 +301,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
  
         desc = irq_to_desc(dev_priv->lpe_audio.irq);
  
-       lpe_audio_irq_mask(&desc->irq_data);
-
         lpe_audio_platdev_destroy(dev_priv);
  
         irq_free_desc(dev_priv->lpe_audio.irq);
@@ -337,53 +311,47 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
   * intel_lpe_audio_notify() - notify lpe audio event
   * audio driver and i915
   * @dev_priv: the i915 drm device private data
+ * @pipe: pipe
+ * @port: port
   * @eld : ELD data
- * @pipe: pipe id
- * @port: port id
- * @tmds_clk_speed: tmds clock frequency in Hz
+ * @ls_clock: Link symbol clock in kHz
+ * @dp_output: Driving a DP output?
   *
   * Notify lpe audio driver of eld change.
   */
  void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-                           void *eld, int port, int pipe, int tmds_clk_speed,
-                           bool dp_output, int link_rate)
+                           enum pipe pipe, enum port port,
+                           const void *eld, int ls_clock, bool dp_output)
  {
-       unsigned long irq_flags;
-       struct intel_hdmi_lpe_audio_pdata *pdata = NULL;
+       unsigned long irqflags;
+       struct intel_hdmi_lpe_audio_pdata *pdata;
+       struct intel_hdmi_lpe_audio_port_pdata *ppdata;
         u32 audio_enable;
  
         if (!HAS_LPE_AUDIO(dev_priv))
                 return;
  
-       pdata = dev_get_platdata(
-               &(dev_priv->lpe_audio.platdev->dev));
+       pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev);
+       ppdata = &pdata->port[port - PORT_B];
  
-       spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags);
+       spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags);
  
         audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port));
  
         if (eld != NULL) {
-               memcpy(pdata->eld.eld_data, eld,
-                       HDMI_MAX_ELD_BYTES);
-               pdata->eld.port_id = port;
-               pdata->eld.pipe_id = pipe;
-               pdata->hdmi_connected = true;
-
-               pdata->dp_output = dp_output;
-               if (tmds_clk_speed)
-                       pdata->tmds_clock_speed = tmds_clk_speed;
-               if (link_rate)
-                       pdata->link_rate = link_rate;
+               memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES);
+               ppdata->pipe = pipe;
+               ppdata->ls_clock = ls_clock;
+               ppdata->dp_output = dp_output;
  
                 /* Unmute the amp for both DP and HDMI */
                 I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
                            audio_enable & ~VLV_AMP_MUTE);
-
         } else {
-               memset(pdata->eld.eld_data, 0,
-                       HDMI_MAX_ELD_BYTES);
-               pdata->hdmi_connected = false;
-               pdata->dp_output = false;
+               memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES);
+               ppdata->pipe = -1;
+               ppdata->ls_clock = 0;
+               ppdata->dp_output = false;
  
                 /* Mute the amp for both DP and HDMI */
                 I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
@@ -391,10 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
         }
  
         if (pdata->notify_audio_lpe)
-               pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev);
-       else
-               pdata->notify_pending = true;
+               pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B);
  
-       spin_unlock_irqrestore(&pdata->lpe_audio_slock,
-                       irq_flags);
+       spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags);
  }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index c8f7c631fc1f8e354cac0038c80aa35d0a1dd0d2..014b30ace8a0af394960d4a8ed259dc712758b52 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,10 +138,6 @@
  #include "i915_drv.h"
  #include "intel_mocs.h"
  
-#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
-
  #define RING_EXECLIST_QFULL            (1 << 0x2)
  #define RING_EXECLIST1_VALID           (1 << 0x3)
  #define RING_EXECLIST0_VALID           (1 << 0x4)
@@ -326,8 +322,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
                 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
         u32 *reg_state = ce->lrc_reg_state;
  
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       reg_state[CTX_RING_TAIL+1] = rq->tail;
+       reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
  
         /* True 32b PPGTT with dynamic page allocation: update PDP
          * registers and point the unallocated PDPs to scratch page.
@@ -342,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
  
  static void execlists_submit_ports(struct intel_engine_cs *engine)
  {
-       struct drm_i915_private *dev_priv = engine->i915;
         struct execlist_port *port = engine->execlist_port;
         u32 __iomem *elsp =
-               dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-       u64 desc[2];
-
-       GEM_BUG_ON(port[0].count > 1);
-       if (!port[0].count)
-               execlists_context_status_change(port[0].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-       desc[0] = execlists_update_context(port[0].request);
-       GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-       port[0].count++;
-
-       if (port[1].request) {
-               GEM_BUG_ON(port[1].count);
-               execlists_context_status_change(port[1].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-               desc[1] = execlists_update_context(port[1].request);
-               GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-               port[1].count = 1;
-       } else {
-               desc[1] = 0;
-       }
-       GEM_BUG_ON(desc[0] == desc[1]);
-
-       /* You must always write both descriptors in the order below. */
-       writel(upper_32_bits(desc[1]), elsp);
-       writel(lower_32_bits(desc[1]), elsp);
+               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+       unsigned int n;
+
+       for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+               struct drm_i915_gem_request *rq;
+               unsigned int count;
+               u64 desc;
+
+               rq = port_unpack(&port[n], &count);
+               if (rq) {
+                       GEM_BUG_ON(count > !n);
+                       if (!count++)
+                               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+                       port_set(&port[n], port_pack(rq, count));
+                       desc = execlists_update_context(rq);
+                       GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+               } else {
+                       GEM_BUG_ON(!n);
+                       desc = 0;
+               }
  
-       writel(upper_32_bits(desc[0]), elsp);
-       /* The context is automatically loaded after the following */
-       writel(lower_32_bits(desc[0]), elsp);
+               writel(upper_32_bits(desc), elsp);
+               writel(lower_32_bits(desc), elsp);
+       }
  }
  
  static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -395,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
         return true;
  }
  
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
  static void execlists_dequeue(struct intel_engine_cs *engine)
  {
         struct drm_i915_gem_request *last;
@@ -402,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
         struct rb_node *rb;
         bool submit = false;
  
-       last = port->request;
+       last = port_request(port);
         if (last)
                 /* WaIdleLiteRestore:bdw,skl
                  * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -412,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                  */
                 last->tail = last->wa_tail;
  
-       GEM_BUG_ON(port[1].request);
+       GEM_BUG_ON(port_isset(&port[1]));
  
         /* Hardware submission is through 2 ports. Conceptually each port
          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -437,72 +436,86 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  
         spin_lock_irq(&engine->timeline->lock);
         rb = engine->execlist_first;
+       GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
         while (rb) {
-               struct drm_i915_gem_request *cursor =
-                       rb_entry(rb, typeof(*cursor), priotree.node);
-
-               /* Can we combine this request with the current port? It has to
-                * be the same context/ringbuffer and not have any exceptions
-                * (e.g. GVT saying never to combine contexts).
-                *
-                * If we can combine the requests, we can execute both by
-                * updating the RING_TAIL to point to the end of the second
-                * request, and so we never need to tell the hardware about
-                * the first.
-                */
-               if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-                       /* If we are on the second port and cannot combine
-                        * this request with the last, then we are done.
-                        */
-                       if (port != engine->execlist_port)
-                               break;
-
-                       /* If GVT overrides us we only ever submit port[0],
-                        * leaving port[1] empty. Note that we also have
-                        * to be careful that we don't queue the same
-                        * context (even though a different request) to
-                        * the second port.
+               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+               struct drm_i915_gem_request *rq, *rn;
+
+               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+                       /*
+                        * Can we combine this request with the current port?
+                        * It has to be the same context/ringbuffer and not
+                        * have any exceptions (e.g. GVT saying never to
+                        * combine contexts).
+                        *
+                        * If we can combine the requests, we can execute both
+                        * by updating the RING_TAIL to point to the end of the
+                        * second request, and so we never need to tell the
+                        * hardware about the first.
                          */
-                       if (ctx_single_port_submission(last->ctx) ||
-                           ctx_single_port_submission(cursor->ctx))
-                               break;
+                       if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+                               /*
+                                * If we are on the second port and cannot
+                                * combine this request with the last, then we
+                                * are done.
+                                */
+                               if (port != engine->execlist_port) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               /*
+                                * If GVT overrides us we only ever submit
+                                * port[0], leaving port[1] empty. Note that we
+                                * also have to be careful that we don't queue
+                                * the same context (even though a different
+                                * request) to the second port.
+                                */
+                               if (ctx_single_port_submission(last->ctx) ||
+                                   ctx_single_port_submission(rq->ctx)) {
+                                       __list_del_many(&p->requests,
+                                                       &rq->priotree.link);
+                                       goto done;
+                               }
+
+                               GEM_BUG_ON(last->ctx == rq->ctx);
+
+                               if (submit)
+                                       port_assign(port, last);
+                               port++;
+                       }
  
-                       GEM_BUG_ON(last->ctx == cursor->ctx);
+                       INIT_LIST_HEAD(&rq->priotree.link);
+                       rq->priotree.priority = INT_MAX;
  
-                       i915_gem_request_assign(&port->request, last);
-                       port++;
+                       __i915_gem_request_submit(rq);
+                       trace_i915_gem_request_in(rq, port_index(port, engine));
+                       last = rq;
+                       submit = true;
                 }
  
                 rb = rb_next(rb);
-               rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-               RB_CLEAR_NODE(&cursor->priotree.node);
-               cursor->priotree.priority = INT_MAX;
-
-               __i915_gem_request_submit(cursor);
-               trace_i915_gem_request_in(cursor, port - engine->execlist_port);
-               last = cursor;
-               submit = true;
-       }
-       if (submit) {
-               i915_gem_request_assign(&port->request, last);
-               engine->execlist_first = rb;
+               rb_erase(&p->node, &engine->execlist_queue);
+               INIT_LIST_HEAD(&p->requests);
+               if (p->priority != I915_PRIORITY_NORMAL)
+                       kmem_cache_free(engine->i915->priorities, p);
         }
+done:
+       engine->execlist_first = rb;
+       if (submit)
+               port_assign(port, last);
         spin_unlock_irq(&engine->timeline->lock);
  
         if (submit)
                 execlists_submit_ports(engine);
  }
  
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-       return !engine->execlist_port[0].request;
-}
-
  static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
  {
         const struct execlist_port *port = engine->execlist_port;
  
-       return port[0].count + port[1].count < 2;
+       return port_count(&port[0]) + port_count(&port[1]) < 2;
  }
  
  /*
@@ -515,6 +528,15 @@ static void intel_lrc_irq_handler(unsigned long data)
         struct execlist_port *port = engine->execlist_port;
         struct drm_i915_private *dev_priv = engine->i915;
  
+       /* We can skip acquiring intel_runtime_pm_get() here as it was taken
+        * on our behalf by the request (see i915_gem_mark_busy()) and it will
+        * not be relinquished until the device is idle (see
+        * i915_gem_idle_work_handler()). As a precaution, we make sure
+        * that all ELSP are drained i.e. we have processed the CSB,
+        * before allowing ourselves to idle and calling intel_runtime_pm_put().
+        */
+       GEM_BUG_ON(!dev_priv->gt.awake);
+
         intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
  
         /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
@@ -543,7 +565,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                 tail = GEN8_CSB_WRITE_PTR(head);
                 head = GEN8_CSB_READ_PTR(head);
                 while (head != tail) {
+                       struct drm_i915_gem_request *rq;
                         unsigned int status;
+                       unsigned int count;
  
                         if (++head == GEN8_CSB_ENTRIES)
                                 head = 0;
@@ -571,22 +595,26 @@ static void intel_lrc_irq_handler(unsigned long data)
  
                         /* Check the context/desc id for this event matches */
                         GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-                                        port[0].context_id);
+                                        port->context_id);
  
-                       GEM_BUG_ON(port[0].count == 0);
-                       if (--port[0].count == 0) {
+                       rq = port_unpack(port, &count);
+                       GEM_BUG_ON(count == 0);
+                       if (--count == 0) {
                                 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-                               execlists_context_status_change(port[0].request,
-                                                               INTEL_CONTEXT_SCHEDULE_OUT);
+                               GEM_BUG_ON(!i915_gem_request_completed(rq));
+                               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+                               trace_i915_gem_request_out(rq);
+                               i915_gem_request_put(rq);
  
-                               trace_i915_gem_request_out(port[0].request);
-                               i915_gem_request_put(port[0].request);
                                 port[0] = port[1];
                                 memset(&port[1], 0, sizeof(port[1]));
+                       } else {
+                               port_set(port, port_pack(rq, count));
                         }
  
-                       GEM_BUG_ON(port[0].count == 0 &&
+                       /* After the final element, the hw should be idle */
+                       GEM_BUG_ON(port_count(port) == 0 &&
                                    !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
                 }
  
@@ -600,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
         intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
  }
  
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+              struct i915_priotree *pt,
+              int prio)
  {
-       struct rb_node **p, *rb;
+       struct i915_priolist *p;
+       struct rb_node **parent, *rb;
         bool first = true;
  
+       if (unlikely(engine->no_priolist))
+               prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
         /* most positive priority is scheduled first, equal priorities fifo */
         rb = NULL;
-       p = &root->rb_node;
-       while (*p) {
-               struct i915_priotree *pos;
-
-               rb = *p;
-               pos = rb_entry(rb, typeof(*pos), node);
-               if (pt->priority > pos->priority) {
-                       p = &rb->rb_left;
-               } else {
-                       p = &rb->rb_right;
+       parent = &engine->execlist_queue.rb_node;
+       while (*parent) {
+               rb = *parent;
+               p = rb_entry(rb, typeof(*p), node);
+               if (prio > p->priority) {
+                       parent = &rb->rb_left;
+               } else if (prio < p->priority) {
+                       parent = &rb->rb_right;
                         first = false;
+               } else {
+                       list_add_tail(&pt->link, &p->requests);
+                       return false;
                 }
         }
-       rb_link_node(&pt->node, rb, p);
-       rb_insert_color(&pt->node, root);
+
+       if (prio == I915_PRIORITY_NORMAL) {
+               p = &engine->default_priolist;
+       } else {
+               p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+               /* Convert an allocation failure to a priority bump */
+               if (unlikely(!p)) {
+                       prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+                       /* To maintain ordering with all rendering, after an
+                        * allocation failure we have to disable all scheduling.
+                        * Requests will then be executed in fifo, and schedule
+                        * will ensure that dependencies are emitted in fifo.
+                        * There will be still some reordering with existing
+                        * requests, so if userspace lied about their
+                        * dependencies that reordering may be visible.
+                        */
+                       engine->no_priolist = true;
+                       goto find_priolist;
+               }
+       }
+
+       p->priority = prio;
+       rb_link_node(&p->node, rb, parent);
+       rb_insert_color(&p->node, &engine->execlist_queue);
+
+       INIT_LIST_HEAD(&p->requests);
+       list_add_tail(&pt->link, &p->requests);
+
+       if (first)
+               engine->execlist_first = &p->node;
  
         return first;
  }
@@ -634,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
         /* Will be called from irq-context when using foreign fences. */
         spin_lock_irqsave(&engine->timeline->lock, flags);
  
-       if (insert_request(&request->priotree, &engine->execlist_queue)) {
-               engine->execlist_first = &request->priotree.node;
+       if (insert_request(engine,
+                          &request->priotree,
+                          request->priotree.priority)) {
                 if (execlists_elsp_ready(engine))
                         tasklet_hi_schedule(&engine->irq_tasklet);
         }
  
+       GEM_BUG_ON(!engine->execlist_first);
+       GEM_BUG_ON(list_empty(&request->priotree.link));
+
         spin_unlock_irqrestore(&engine->timeline->lock, flags);
  }
  
@@ -709,6 +779,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
                 list_safe_reset_next(dep, p, dfs_link);
         }
  
+       /* If we didn't need to bump any existing priorities, and we haven't
+        * yet submitted this request (i.e. there is no potential race with
+        * execlists_submit_request()), we can set our own priority and skip
+        * acquiring the engine locks.
+        */
+       if (request->priotree.priority == INT_MIN) {
+               GEM_BUG_ON(!list_empty(&request->priotree.link));
+               request->priotree.priority = prio;
+               if (stack.dfs_link.next == stack.dfs_link.prev)
+                       return;
+               __list_del_entry(&stack.dfs_link);
+       }
+
         engine = request->engine;
         spin_lock_irq(&engine->timeline->lock);
  
@@ -724,10 +807,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
                         continue;
  
                 pt->priority = prio;
-               if (!RB_EMPTY_NODE(&pt->node)) {
-                       rb_erase(&pt->node, &engine->execlist_queue);
-                       if (insert_request(pt, &engine->execlist_queue))
-                               engine->execlist_first = &pt->node;
+               if (!list_empty(&pt->link)) {
+                       __list_del_entry(&pt->link);
+                       insert_request(engine, pt, prio);
                 }
         }
  
@@ -736,8 +818,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
         /* XXX Do we need to preempt to make room for us and our deps? */
  }
  
-static int execlists_context_pin(struct intel_engine_cs *engine,
-                                struct i915_gem_context *ctx)
+static struct intel_ring *
+execlists_context_pin(struct intel_engine_cs *engine,
+                     struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
         unsigned int flags;
@@ -746,8 +829,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
  
         lockdep_assert_held(&ctx->i915->drm.struct_mutex);
  
-       if (ce->pin_count++)
-               return 0;
+       if (likely(ce->pin_count++))
+               goto out;
         GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
  
         if (!ce->state) {
@@ -771,7 +854,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
                 goto unpin_vma;
         }
  
-       ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias);
+       ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias);
         if (ret)
                 goto unpin_map;
  
@@ -784,7 +867,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
         ce->state->obj->mm.dirty = true;
  
         i915_gem_context_get(ctx);
-       return 0;
+out:
+       return ce->ring;
  
  unpin_map:
         i915_gem_object_unpin_map(ce->state->obj);
@@ -792,7 +876,7 @@ unpin_vma:
         __i915_vma_unpin(ce->state);
  err:
         ce->pin_count = 0;
-       return ret;
+       return ERR_PTR(ret);
  }
  
  static void execlists_context_unpin(struct intel_engine_cs *engine,
@@ -829,9 +913,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
          */
         request->reserved_space += EXECLISTS_REQUEST_SIZE;
  
-       GEM_BUG_ON(!ce->ring);
-       request->ring = ce->ring;
-
         if (i915.enable_guc_submission) {
                 /*
                  * Check that the GuC has space for the request before
@@ -1139,14 +1220,12 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
         return ret;
  }
  
-static u32 port_seqno(struct execlist_port *port)
-{
-       return port->request ? port->request->global_seqno : 0;
-}
-
  static int gen8_init_common_ring(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
+       struct execlist_port *port = engine->execlist_port;
+       unsigned int n;
+       bool submit;
         int ret;
  
         ret = intel_mocs_init_engine(engine);
@@ -1167,16 +1246,24 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
  
         /* After a GPU reset, we may have requests to replay */
         clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-       if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
-               DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
-                                engine->name,
-                                port_seqno(&engine->execlist_port[0]),
-                                port_seqno(&engine->execlist_port[1]));
-               engine->execlist_port[0].count = 0;
-               engine->execlist_port[1].count = 0;
-               execlists_submit_ports(engine);
+
+       submit = false;
+       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+               if (!port_isset(&port[n]))
+                       break;
+
+               DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
+                                engine->name, n,
+                                port_request(&port[n])->global_seqno);
+
+               /* Discard the current inflight count */
+               port_set(&port[n], port_request(&port[n]));
+               submit = true;
         }
  
+       if (submit && !i915.enable_guc_submission)
+               execlists_submit_ports(engine);
+
         return 0;
  }
  
@@ -1252,13 +1339,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
         intel_ring_update_space(request->ring);
  
         /* Catch up with any missed context-switch interrupts */
-       if (request->ctx != port[0].request->ctx) {
-               i915_gem_request_put(port[0].request);
+       if (request->ctx != port_request(port)->ctx) {
+               i915_gem_request_put(port_request(port));
                 port[0] = port[1];
                 memset(&port[1], 0, sizeof(port[1]));
         }
  
-       GEM_BUG_ON(request->ctx != port[0].request->ctx);
+       GEM_BUG_ON(request->ctx != port_request(port)->ctx);
  
         /* Reset WaIdleLiteRestore:bdw,skl as well */
         request->tail =
@@ -1907,44 +1994,6 @@ populate_lr_context(struct i915_gem_context *ctx,
         return 0;
  }
  
-/**
- * intel_lr_context_size() - return the size of the context for an engine
- * @engine: which engine to find the context size for
- *
- * Each engine may require a different amount of space for a context image,
- * so when allocating (or copying) an image, this function can be used to
- * find the right size for the specific engine.
- *
- * Return: size (in bytes) of an engine-specific context image
- *
- * Note: this size includes the HWSP, which is part of the context image
- * in LRC mode, but does not include the "shared data page" used with
- * GuC submission. The caller should account for this if using the GuC.
- */
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
-{
-       int ret = 0;
-
-       WARN_ON(INTEL_GEN(engine->i915) < 8);
-
-       switch (engine->id) {
-       case RCS:
-               if (INTEL_GEN(engine->i915) >= 9)
-                       ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-               else
-                       ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-               break;
-       case VCS:
-       case BCS:
-       case VECS:
-       case VCS2:
-               ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-               break;
-       }
-
-       return ret;
-}
-
  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                             struct intel_engine_cs *engine)
  {
@@ -1957,8 +2006,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
  
         WARN_ON(ce->state);
  
-       context_size = round_up(intel_lr_context_size(engine),
-                               I915_GTT_PAGE_SIZE);
+       context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
  
         /* One extra page as the sharing data between driver and GuC */
         context_size += PAGE_SIZE * LRC_PPHWSP_PN;
@@ -1989,7 +2037,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
  
         ce->ring = ring;
         ce->state = vma;
-       ce->initialised = engine->init_context == NULL;
+       ce->initialised |= engine->init_context == NULL;
  
         return 0;
  
@@ -2036,8 +2084,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                         ce->state->obj->mm.dirty = true;
                         i915_gem_object_unpin_map(ce->state->obj);
  
-                       ce->ring->head = ce->ring->tail = 0;
-                       intel_ring_update_space(ce->ring);
+                       intel_ring_reset(ce->ring, 0);
                 }
         }
  }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h

index e8015e7bf4e902ed02f6cc2c52e5b35d2d4bc0d6..52b3a1fd4059bab91a898f7f498a79af2c117a4b 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  struct drm_i915_private;
  struct i915_gem_context;
  
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-
  void intel_lr_context_resume(struct drm_i915_private *dev_priv);
  uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
                                      struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c

index cb50c527401fe123b8ad76f7f1f831b8ccdd920a..c8103f8d4dfa7723e5b4093c17b656b7853a2453 100644 (file)
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -888,10 +888,14 @@ static void pch_enable_backlight(struct intel_connector *connector)
         struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
         struct intel_panel *panel = &connector->panel;
         enum pipe pipe = intel_get_pipe_from_connector(connector);
-       enum transcoder cpu_transcoder =
-               intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+       enum transcoder cpu_transcoder;
         u32 cpu_ctl2, pch_ctl1, pch_ctl2;
  
+       if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
+               cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+       else
+               cpu_transcoder = TRANSCODER_EDP;
+
         cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2);
         if (cpu_ctl2 & BLM_PWM_ENABLE) {
                 DRM_DEBUG_KMS("cpu backlight already enabled\n");
@@ -973,6 +977,9 @@ static void i965_enable_backlight(struct intel_connector *connector)
         enum pipe pipe = intel_get_pipe_from_connector(connector);
         u32 ctl, ctl2, freq;
  
+       if (WARN_ON_ONCE(pipe == INVALID_PIPE))
+               pipe = PIPE_A;
+
         ctl2 = I915_READ(BLC_PWM_CTL2);
         if (ctl2 & BLM_PWM_ENABLE) {
                 DRM_DEBUG_KMS("backlight already enabled\n");
@@ -1037,6 +1044,9 @@ static void bxt_enable_backlight(struct intel_connector *connector)
         enum pipe pipe = intel_get_pipe_from_connector(connector);
         u32 pwm_ctl, val;
  
+       if (WARN_ON_ONCE(pipe == INVALID_PIPE))
+               pipe = PIPE_A;
+
         /* Controller 1 uses the utility pin. */
         if (panel->backlight.controller == 1) {
                 val = I915_READ(UTIL_PIN_CTL);
@@ -1093,7 +1103,8 @@ void intel_panel_enable_backlight(struct intel_connector *connector)
         if (!panel->backlight.present)
                 return;
  
-       DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
+       if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
+               DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
  
         mutex_lock(&dev_priv->backlight_lock);
  
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c

index 206ee4f0150e7fc339018611b50dbcda7b2205a7..8fbd2bd0877fbc650fa6f604beba2cbd8e18a48c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
         struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
         struct intel_crtc_state *pipe_config;
         struct drm_atomic_state *state;
+       struct drm_modeset_acquire_ctx ctx;
         int ret = 0;
  
-       drm_modeset_lock_all(dev);
+       drm_modeset_acquire_init(&ctx, 0);
+
         state = drm_atomic_state_alloc(dev);
         if (!state) {
                 ret = -ENOMEM;
                 goto unlock;
         }
  
-       state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx;
+       state->acquire_ctx = &ctx;
+
+retry:
         pipe_config = intel_atomic_get_crtc_state(state, crtc);
         if (IS_ERR(pipe_config)) {
                 ret = PTR_ERR(pipe_config);
@@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
         ret = drm_atomic_commit(state);
  
  put_state:
+       if (ret == -EDEADLK) {
+               drm_atomic_state_clear(state);
+               drm_modeset_backoff(&ctx);
+               goto retry;
+       }
+
         drm_atomic_state_put(state);
  unlock:
         WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
-       drm_modeset_unlock_all(dev);
+       drm_modeset_drop_locks(&ctx);
+       drm_modeset_acquire_fini(&ctx);
  }
  
  static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
@@ -842,19 +853,12 @@ static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf,
                 return -E2BIG;
         }
  
-       tmpbuf = kmalloc(len + 1, GFP_KERNEL);
-       if (!tmpbuf)
-               return -ENOMEM;
-
-       if (copy_from_user(tmpbuf, ubuf, len)) {
-               ret = -EFAULT;
-               goto out;
-       }
-       tmpbuf[len] = '\0';
+       tmpbuf = memdup_user_nul(ubuf, len);
+       if (IS_ERR(tmpbuf))
+               return PTR_ERR(tmpbuf);
  
         ret = display_crc_ctl_parse(dev_priv, tmpbuf, len);
  
-out:
         kfree(tmpbuf);
         if (ret < 0)
                 return ret;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 570bd603f401d513ac3f08c67fc78d6d1523b762..936eef1634c73fe201300830719c3896dc10291e 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -386,13 +386,53 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl
         return was_enabled;
  }
  
+/**
+ * intel_set_memory_cxsr - Configure CxSR state
+ * @dev_priv: i915 device
+ * @enable: Allow vs. disallow CxSR
+ *
+ * Allow or disallow the system to enter a special CxSR
+ * (C-state self refresh) state. What typically happens in CxSR mode
+ * is that several display FIFOs may get combined into a single larger
+ * FIFO for a particular plane (so called max FIFO mode) to allow the
+ * system to defer memory fetches longer, and the memory will enter
+ * self refresh.
+ *
+ * Note that enabling CxSR does not guarantee that the system enter
+ * this special mode, nor does it guarantee that the system stays
+ * in that mode once entered. So this just allows/disallows the system
+ * to autonomously utilize the CxSR mode. Other factors such as core
+ * C-states will affect when/if the system actually enters/exits the
+ * CxSR mode.
+ *
+ * Note that on VLV/CHV this actually only controls the max FIFO mode,
+ * and the system is free to enter/exit memory self refresh at any time
+ * even when the use of CxSR has been disallowed.
+ *
+ * While the system is actually in the CxSR/max FIFO mode, some plane
+ * control registers will not get latched on vblank. Thus in order to
+ * guarantee the system will respond to changes in the plane registers
+ * we must always disallow CxSR prior to making changes to those registers.
+ * Unfortunately the system will re-evaluate the CxSR conditions at
+ * frame start which happens after vblank start (which is when the plane
+ * registers would get latched), so we can't proceed with the plane update
+ * during the same frame where we disallowed CxSR.
+ *
+ * Certain platforms also have a deeper HPLL SR mode. Fortunately the
+ * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
+ * the hardware w.r.t. HPLL SR when writing to plane registers.
+ * Disallowing just CxSR is sufficient.
+ */
  bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
  {
         bool ret;
  
         mutex_lock(&dev_priv->wm.wm_mutex);
         ret = _intel_set_memory_cxsr(dev_priv, enable);
-       dev_priv->wm.vlv.cxsr = enable;
+       if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+               dev_priv->wm.vlv.cxsr = enable;
+       else if (IS_G4X(dev_priv))
+               dev_priv->wm.g4x.cxsr = enable;
         mutex_unlock(&dev_priv->wm.wm_mutex);
  
         return ret;
@@ -454,13 +494,6 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
         fifo_state->plane[PLANE_CURSOR] = 63;
-
-       DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n",
-                     pipe_name(pipe),
-                     fifo_state->plane[PLANE_PRIMARY],
-                     fifo_state->plane[PLANE_SPRITE0],
-                     fifo_state->plane[PLANE_SPRITE1],
-                     fifo_state->plane[PLANE_CURSOR]);
  }
  
  static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
@@ -538,20 +571,6 @@ static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
  };
-static const struct intel_watermark_params g4x_wm_info = {
-       .fifo_size = G4X_FIFO_SIZE,
-       .max_wm = G4X_MAX_WM,
-       .default_wm = G4X_MAX_WM,
-       .guard_size = 2,
-       .cacheline_size = G4X_FIFO_LINE_SIZE,
-};
-static const struct intel_watermark_params g4x_cursor_wm_info = {
-       .fifo_size = I965_CURSOR_FIFO,
-       .max_wm = I965_CURSOR_MAX_WM,
-       .default_wm = I965_CURSOR_DFT_WM,
-       .guard_size = 2,
-       .cacheline_size = G4X_FIFO_LINE_SIZE,
-};
  static const struct intel_watermark_params i965_cursor_wm_info = {
         .fifo_size = I965_CURSOR_FIFO,
         .max_wm = I965_CURSOR_MAX_WM,
@@ -595,9 +614,105 @@ static const struct intel_watermark_params i845_wm_info = {
         .cacheline_size = I830_FIFO_LINE_SIZE,
  };
  
+/**
+ * intel_wm_method1 - Method 1 / "small buffer" watermark formula
+ * @pixel_rate: Pipe pixel rate in kHz
+ * @cpp: Plane bytes per pixel
+ * @latency: Memory wakeup latency in 0.1us units
+ *
+ * Compute the watermark using the method 1 or "small buffer"
+ * formula. The caller may additonally add extra cachelines
+ * to account for TLB misses and clock crossings.
+ *
+ * This method is concerned with the short term drain rate
+ * of the FIFO, ie. it does not account for blanking periods
+ * which would effectively reduce the average drain rate across
+ * a longer period. The name "small" refers to the fact the
+ * FIFO is relatively small compared to the amount of data
+ * fetched.
+ *
+ * The FIFO level vs. time graph might look something like:
+ *
+ *   |\   |\
+ *   | \  | \
+ * __---__---__ (- plane active, _ blanking)
+ * -> time
+ *
+ * or perhaps like this:
+ *
+ *   |\|\  |\|\
+ * __----__----__ (- plane active, _ blanking)
+ * -> time
+ *
+ * Returns:
+ * The watermark in bytes
+ */
+static unsigned int intel_wm_method1(unsigned int pixel_rate,
+                                    unsigned int cpp,
+                                    unsigned int latency)
+{
+       uint64_t ret;
+
+       ret = (uint64_t) pixel_rate * cpp * latency;
+       ret = DIV_ROUND_UP_ULL(ret, 10000);
+
+       return ret;
+}
+
+/**
+ * intel_wm_method2 - Method 2 / "large buffer" watermark formula
+ * @pixel_rate: Pipe pixel rate in kHz
+ * @htotal: Pipe horizontal total
+ * @width: Plane width in pixels
+ * @cpp: Plane bytes per pixel
+ * @latency: Memory wakeup latency in 0.1us units
+ *
+ * Compute the watermark using the method 2 or "large buffer"
+ * formula. The caller may additonally add extra cachelines
+ * to account for TLB misses and clock crossings.
+ *
+ * This method is concerned with the long term drain rate
+ * of the FIFO, ie. it does account for blanking periods
+ * which effectively reduce the average drain rate across
+ * a longer period. The name "large" refers to the fact the
+ * FIFO is relatively large compared to the amount of data
+ * fetched.
+ *
+ * The FIFO level vs. time graph might look something like:
+ *
+ *    |\___       |\___
+ *    |    \___   |    \___
+ *    |        \  |        \
+ * __ --__--__--__--__--__--__ (- plane active, _ blanking)
+ * -> time
+ *
+ * Returns:
+ * The watermark in bytes
+ */
+static unsigned int intel_wm_method2(unsigned int pixel_rate,
+                                    unsigned int htotal,
+                                    unsigned int width,
+                                    unsigned int cpp,
+                                    unsigned int latency)
+{
+       unsigned int ret;
+
+       /*
+        * FIXME remove once all users are computing
+        * watermarks in the correct place.
+        */
+       if (WARN_ON_ONCE(htotal == 0))
+               htotal = 1;
+
+       ret = (latency * pixel_rate) / (htotal * 10000);
+       ret = (ret + 1) * width * cpp;
+
+       return ret;
+}
+
  /**
   * intel_calculate_wm - calculate watermark level
- * @clock_in_khz: pixel clock
+ * @pixel_rate: pixel clock
   * @wm: chip FIFO params
   * @cpp: bytes per pixel
   * @latency_ns: memory latency for the platform
@@ -613,12 +728,12 @@ static const struct intel_watermark_params i845_wm_info = {
   * past the watermark point.  If the FIFO drains completely, a FIFO underrun
   * will occur, and a display engine hang could result.
   */
-static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
-                                       const struct intel_watermark_params *wm,
-                                       int fifo_size, int cpp,
-                                       unsigned long latency_ns)
+static unsigned int intel_calculate_wm(int pixel_rate,
+                                      const struct intel_watermark_params *wm,
+                                      int fifo_size, int cpp,
+                                      unsigned int latency_ns)
  {
-       long entries_required, wm_size;
+       int entries, wm_size;
  
         /*
          * Note: we need to make sure we don't overflow for various clock &
@@ -626,18 +741,17 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
          * clocks go from a few thousand to several hundred thousand.
          * latency is usually a few thousand
          */
-       entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) /
-               1000;
-       entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
+       entries = intel_wm_method1(pixel_rate, cpp,
+                                  latency_ns / 100);
+       entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
+               wm->guard_size;
+       DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
  
-       DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
-
-       wm_size = fifo_size - (entries_required + wm->guard_size);
-
-       DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
+       wm_size = fifo_size - entries;
+       DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
  
         /* Don't promote wm_size to unsigned... */
-       if (wm_size > (long)wm->max_wm)
+       if (wm_size > wm->max_wm)
                 wm_size = wm->max_wm;
         if (wm_size <= 0)
                 wm_size = wm->default_wm;
@@ -655,6 +769,21 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
         return wm_size;
  }
  
+static bool is_disabling(int old, int new, int threshold)
+{
+       return old >= threshold && new < threshold;
+}
+
+static bool is_enabling(int old, int new, int threshold)
+{
+       return old < threshold && new >= threshold;
+}
+
+static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
+{
+       return dev_priv->wm.max_level + 1;
+}
+
  static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
                                    const struct intel_plane_state *plane_state)
  {
@@ -699,7 +828,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
         struct intel_crtc *crtc;
         const struct cxsr_latency *latency;
         u32 reg;
-       unsigned long wm;
+       unsigned int wm;
  
         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
                                          dev_priv->is_ddr3,
@@ -733,7 +862,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
                 /* cursor SR */
                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
                                         pineview_display_wm.fifo_size,
-                                       cpp, latency->cursor_sr);
+                                       4, latency->cursor_sr);
                 reg = I915_READ(DSPFW3);
                 reg &= ~DSPFW_CURSOR_SR_MASK;
                 reg |= FW_WM(wm, CURSOR_SR);
@@ -751,7 +880,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
                 /* cursor HPLL off SR */
                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
                                         pineview_display_hplloff_wm.fifo_size,
-                                       cpp, latency->cursor_hpll_disable);
+                                       4, latency->cursor_hpll_disable);
                 reg = I915_READ(DSPFW3);
                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
                 reg |= FW_WM(wm, HPLL_CURSOR);
@@ -764,144 +893,50 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
         }
  }
  
-static bool g4x_compute_wm0(struct drm_i915_private *dev_priv,
-                           int plane,
-                           const struct intel_watermark_params *display,
-                           int display_latency_ns,
-                           const struct intel_watermark_params *cursor,
-                           int cursor_latency_ns,
-                           int *plane_wm,
-                           int *cursor_wm)
-{
-       struct intel_crtc *crtc;
-       const struct drm_display_mode *adjusted_mode;
-       const struct drm_framebuffer *fb;
-       int htotal, hdisplay, clock, cpp;
-       int line_time_us, line_count;
-       int entries, tlb_miss;
-
-       crtc = intel_get_crtc_for_plane(dev_priv, plane);
-       if (!intel_crtc_active(crtc)) {
-               *cursor_wm = cursor->guard_size;
-               *plane_wm = display->guard_size;
-               return false;
-       }
-
-       adjusted_mode = &crtc->config->base.adjusted_mode;
-       fb = crtc->base.primary->state->fb;
-       clock = adjusted_mode->crtc_clock;
-       htotal = adjusted_mode->crtc_htotal;
-       hdisplay = crtc->config->pipe_src_w;
-       cpp = fb->format->cpp[0];
-
-       /* Use the small buffer method to calculate plane watermark */
-       entries = ((clock * cpp / 1000) * display_latency_ns) / 1000;
-       tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
-       if (tlb_miss > 0)
-               entries += tlb_miss;
-       entries = DIV_ROUND_UP(entries, display->cacheline_size);
-       *plane_wm = entries + display->guard_size;
-       if (*plane_wm > (int)display->max_wm)
-               *plane_wm = display->max_wm;
-
-       /* Use the large buffer method to calculate cursor watermark */
-       line_time_us = max(htotal * 1000 / clock, 1);
-       line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
-       entries = line_count * crtc->base.cursor->state->crtc_w * cpp;
-       tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
-       if (tlb_miss > 0)
-               entries += tlb_miss;
-       entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
-       *cursor_wm = entries + cursor->guard_size;
-       if (*cursor_wm > (int)cursor->max_wm)
-               *cursor_wm = (int)cursor->max_wm;
-
-       return true;
-}
-
  /*
- * Check the wm result.
- *
- * If any calculated watermark values is larger than the maximum value that
- * can be programmed into the associated watermark register, that watermark
- * must be disabled.
+ * Documentation says:
+ * "If the line size is small, the TLB fetches can get in the way of the
+ *  data fetches, causing some lag in the pixel data return which is not
+ *  accounted for in the above formulas. The following adjustment only
+ *  needs to be applied if eight whole lines fit in the buffer at once.
+ *  The WM is adjusted upwards by the difference between the FIFO size
+ *  and the size of 8 whole lines. This adjustment is always performed
+ *  in the actual pixel depth regardless of whether FBC is enabled or not."
   */
-static bool g4x_check_srwm(struct drm_i915_private *dev_priv,
-                          int display_wm, int cursor_wm,
-                          const struct intel_watermark_params *display,
-                          const struct intel_watermark_params *cursor)
+static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
  {
-       DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
-                     display_wm, cursor_wm);
-
-       if (display_wm > display->max_wm) {
-               DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n",
-                             display_wm, display->max_wm);
-               return false;
-       }
-
-       if (cursor_wm > cursor->max_wm) {
-               DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n",
-                             cursor_wm, cursor->max_wm);
-               return false;
-       }
-
-       if (!(display_wm || cursor_wm)) {
-               DRM_DEBUG_KMS("SR latency is 0, disabling\n");
-               return false;
-       }
+       int tlb_miss = fifo_size * 64 - width * cpp * 8;
  
-       return true;
+       return max(0, tlb_miss);
  }
  
-static bool g4x_compute_srwm(struct drm_i915_private *dev_priv,
-                            int plane,
-                            int latency_ns,
-                            const struct intel_watermark_params *display,
-                            const struct intel_watermark_params *cursor,
-                            int *display_wm, int *cursor_wm)
+static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
+                               const struct g4x_wm_values *wm)
  {
-       struct intel_crtc *crtc;
-       const struct drm_display_mode *adjusted_mode;
-       const struct drm_framebuffer *fb;
-       int hdisplay, htotal, cpp, clock;
-       unsigned long line_time_us;
-       int line_count, line_size;
-       int small, large;
-       int entries;
-
-       if (!latency_ns) {
-               *display_wm = *cursor_wm = 0;
-               return false;
-       }
-
-       crtc = intel_get_crtc_for_plane(dev_priv, plane);
-       adjusted_mode = &crtc->config->base.adjusted_mode;
-       fb = crtc->base.primary->state->fb;
-       clock = adjusted_mode->crtc_clock;
-       htotal = adjusted_mode->crtc_htotal;
-       hdisplay = crtc->config->pipe_src_w;
-       cpp = fb->format->cpp[0];
-
-       line_time_us = max(htotal * 1000 / clock, 1);
-       line_count = (latency_ns / line_time_us + 1000) / 1000;
-       line_size = hdisplay * cpp;
-
-       /* Use the minimum of the small and large buffer method for primary */
-       small = ((clock * cpp / 1000) * latency_ns) / 1000;
-       large = line_count * line_size;
+       enum pipe pipe;
  
-       entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
-       *display_wm = entries + display->guard_size;
+       for_each_pipe(dev_priv, pipe)
+               trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
  
-       /* calculate the self-refresh watermark for display cursor */
-       entries = line_count * cpp * crtc->base.cursor->state->crtc_w;
-       entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
-       *cursor_wm = entries + cursor->guard_size;
+       I915_WRITE(DSPFW1,
+                  FW_WM(wm->sr.plane, SR) |
+                  FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
+                  FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
+                  FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
+       I915_WRITE(DSPFW2,
+                  (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
+                  FW_WM(wm->sr.fbc, FBC_SR) |
+                  FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
+                  FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
+                  FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
+                  FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
+       I915_WRITE(DSPFW3,
+                  (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
+                  FW_WM(wm->sr.cursor, CURSOR_SR) |
+                  FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
+                  FW_WM(wm->hpll.plane, HPLL_SR));
  
-       return g4x_check_srwm(dev_priv,
-                             *display_wm, *cursor_wm,
-                             display, cursor);
+       POSTING_READ(DSPFW1);
  }
  
  #define FW_WM_VLV(value, plane) \
@@ -980,22 +1015,540 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
         }
  
-       POSTING_READ(DSPFW1);
+       POSTING_READ(DSPFW1);
+}
+
+#undef FW_WM_VLV
+
+static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
+{
+       /* all latencies in usec */
+       dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
+       dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
+       dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
+
+       dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
+}
+
+static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
+{
+       /*
+        * DSPCNTR[13] supposedly controls whether the
+        * primary plane can use the FIFO space otherwise
+        * reserved for the sprite plane. It's not 100% clear
+        * what the actual FIFO size is, but it looks like we
+        * can happily set both primary and sprite watermarks
+        * up to 127 cachelines. So that would seem to mean
+        * that either DSPCNTR[13] doesn't do anything, or that
+        * the total FIFO is >= 256 cachelines in size. Either
+        * way, we don't seem to have to worry about this
+        * repartitioning as the maximum watermark value the
+        * register can hold for each plane is lower than the
+        * minimum FIFO size.
+        */
+       switch (plane_id) {
+       case PLANE_CURSOR:
+               return 63;
+       case PLANE_PRIMARY:
+               return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
+       case PLANE_SPRITE0:
+               return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
+       default:
+               MISSING_CASE(plane_id);
+               return 0;
+       }
+}
+
+static int g4x_fbc_fifo_size(int level)
+{
+       switch (level) {
+       case G4X_WM_LEVEL_SR:
+               return 7;
+       case G4X_WM_LEVEL_HPLL:
+               return 15;
+       default:
+               MISSING_CASE(level);
+               return 0;
+       }
+}
+
+static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
+                              const struct intel_plane_state *plane_state,
+                              int level)
+{
+       struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_display_mode *adjusted_mode =
+               &crtc_state->base.adjusted_mode;
+       int clock, htotal, cpp, width, wm;
+       int latency = dev_priv->wm.pri_latency[level] * 10;
+
+       if (latency == 0)
+               return USHRT_MAX;
+
+       if (!intel_wm_plane_visible(crtc_state, plane_state))
+               return 0;
+
+       /*
+        * Not 100% sure which way ELK should go here as the
+        * spec only says CL/CTG should assume 32bpp and BW
+        * doesn't need to. But as these things followed the
+        * mobile vs. desktop lines on gen3 as well, let's
+        * assume ELK doesn't need this.
+        *
+        * The spec also fails to list such a restriction for
+        * the HPLL watermark, which seems a little strange.
+        * Let's use 32bpp for the HPLL watermark as well.
+        */
+       if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
+           level != G4X_WM_LEVEL_NORMAL)
+               cpp = 4;
+       else
+               cpp = plane_state->base.fb->format->cpp[0];
+
+       clock = adjusted_mode->crtc_clock;
+       htotal = adjusted_mode->crtc_htotal;
+
+       if (plane->id == PLANE_CURSOR)
+               width = plane_state->base.crtc_w;
+       else
+               width = drm_rect_width(&plane_state->base.dst);
+
+       if (plane->id == PLANE_CURSOR) {
+               wm = intel_wm_method2(clock, htotal, width, cpp, latency);
+       } else if (plane->id == PLANE_PRIMARY &&
+                  level == G4X_WM_LEVEL_NORMAL) {
+               wm = intel_wm_method1(clock, cpp, latency);
+       } else {
+               int small, large;
+
+               small = intel_wm_method1(clock, cpp, latency);
+               large = intel_wm_method2(clock, htotal, width, cpp, latency);
+
+               wm = min(small, large);
+       }
+
+       wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
+                             width, cpp);
+
+       wm = DIV_ROUND_UP(wm, 64) + 2;
+
+       return min_t(int, wm, USHRT_MAX);
+}
+
+static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
+                                int level, enum plane_id plane_id, u16 value)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+       bool dirty = false;
+
+       for (; level < intel_wm_num_levels(dev_priv); level++) {
+               struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+               dirty |= raw->plane[plane_id] != value;
+               raw->plane[plane_id] = value;
+       }
+
+       return dirty;
+}
+
+static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
+                              int level, u16 value)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+       bool dirty = false;
+
+       /* NORMAL level doesn't have an FBC watermark */
+       level = max(level, G4X_WM_LEVEL_SR);
+
+       for (; level < intel_wm_num_levels(dev_priv); level++) {
+               struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+               dirty |= raw->fbc != value;
+               raw->fbc = value;
+       }
+
+       return dirty;
+}
+
+static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
+                                  const struct intel_plane_state *pstate,
+                                  uint32_t pri_val);
+
+static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
+                                    const struct intel_plane_state *plane_state)
+{
+       struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+       int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
+       enum plane_id plane_id = plane->id;
+       bool dirty = false;
+       int level;
+
+       if (!intel_wm_plane_visible(crtc_state, plane_state)) {
+               dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
+               if (plane_id == PLANE_PRIMARY)
+                       dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
+               goto out;
+       }
+
+       for (level = 0; level < num_levels; level++) {
+               struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+               int wm, max_wm;
+
+               wm = g4x_compute_wm(crtc_state, plane_state, level);
+               max_wm = g4x_plane_fifo_size(plane_id, level);
+
+               if (wm > max_wm)
+                       break;
+
+               dirty |= raw->plane[plane_id] != wm;
+               raw->plane[plane_id] = wm;
+
+               if (plane_id != PLANE_PRIMARY ||
+                   level == G4X_WM_LEVEL_NORMAL)
+                       continue;
+
+               wm = ilk_compute_fbc_wm(crtc_state, plane_state,
+                                       raw->plane[plane_id]);
+               max_wm = g4x_fbc_fifo_size(level);
+
+               /*
+                * FBC wm is not mandatory as we
+                * can always just disable its use.
+                */
+               if (wm > max_wm)
+                       wm = USHRT_MAX;
+
+               dirty |= raw->fbc != wm;
+               raw->fbc = wm;
+       }
+
+       /* mark watermarks as invalid */
+       dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
+
+       if (plane_id == PLANE_PRIMARY)
+               dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
+
+ out:
+       if (dirty) {
+               DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
+                             plane->base.name,
+                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
+                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
+                             crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
+
+               if (plane_id == PLANE_PRIMARY)
+                       DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
+                                     crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
+                                     crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
+       }
+
+       return dirty;
+}
+
+static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
+                                     enum plane_id plane_id, int level)
+{
+       const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+       return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
+}
+
+static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
+                                    int level)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+       if (level > dev_priv->wm.max_level)
+               return false;
+
+       return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
+               g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
+               g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
+}
+
+/* mark all levels starting from 'level' as invalid */
+static void g4x_invalidate_wms(struct intel_crtc *crtc,
+                              struct g4x_wm_state *wm_state, int level)
+{
+       if (level <= G4X_WM_LEVEL_NORMAL) {
+               enum plane_id plane_id;
+
+               for_each_plane_id_on_crtc(crtc, plane_id)
+                       wm_state->wm.plane[plane_id] = USHRT_MAX;
+       }
+
+       if (level <= G4X_WM_LEVEL_SR) {
+               wm_state->cxsr = false;
+               wm_state->sr.cursor = USHRT_MAX;
+               wm_state->sr.plane = USHRT_MAX;
+               wm_state->sr.fbc = USHRT_MAX;
+       }
+
+       if (level <= G4X_WM_LEVEL_HPLL) {
+               wm_state->hpll_en = false;
+               wm_state->hpll.cursor = USHRT_MAX;
+               wm_state->hpll.plane = USHRT_MAX;
+               wm_state->hpll.fbc = USHRT_MAX;
+       }
+}
+
+static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
+{
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+       struct intel_atomic_state *state =
+               to_intel_atomic_state(crtc_state->base.state);
+       struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
+       int num_active_planes = hweight32(crtc_state->active_planes &
+                                         ~BIT(PLANE_CURSOR));
+       const struct g4x_pipe_wm *raw;
+       struct intel_plane_state *plane_state;
+       struct intel_plane *plane;
+       enum plane_id plane_id;
+       int i, level;
+       unsigned int dirty = 0;
+
+       for_each_intel_plane_in_state(state, plane, plane_state, i) {
+               const struct intel_plane_state *old_plane_state =
+                       to_intel_plane_state(plane->base.state);
+
+               if (plane_state->base.crtc != &crtc->base &&
+                   old_plane_state->base.crtc != &crtc->base)
+                       continue;
+
+               if (g4x_raw_plane_wm_compute(crtc_state, plane_state))
+                       dirty |= BIT(plane->id);
+       }
+
+       if (!dirty)
+               return 0;
+
+       level = G4X_WM_LEVEL_NORMAL;
+       if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+               goto out;
+
+       raw = &crtc_state->wm.g4x.raw[level];
+       for_each_plane_id_on_crtc(crtc, plane_id)
+               wm_state->wm.plane[plane_id] = raw->plane[plane_id];
+
+       level = G4X_WM_LEVEL_SR;
+
+       if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+               goto out;
+
+       raw = &crtc_state->wm.g4x.raw[level];
+       wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
+       wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
+       wm_state->sr.fbc = raw->fbc;
+
+       wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
+
+       level = G4X_WM_LEVEL_HPLL;
+
+       if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+               goto out;
+
+       raw = &crtc_state->wm.g4x.raw[level];
+       wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
+       wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
+       wm_state->hpll.fbc = raw->fbc;
+
+       wm_state->hpll_en = wm_state->cxsr;
+
+       level++;
+
+ out:
+       if (level == G4X_WM_LEVEL_NORMAL)
+               return -EINVAL;
+
+       /* invalidate the higher levels */
+       g4x_invalidate_wms(crtc, wm_state, level);
+
+       /*
+        * Determine if the FBC watermark(s) can be used. IF
+        * this isn't the case we prefer to disable the FBC
+        ( watermark(s) rather than disable the SR/HPLL
+        * level(s) entirely.
+        */
+       wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
+
+       if (level >= G4X_WM_LEVEL_SR &&
+           wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
+               wm_state->fbc_en = false;
+       else if (level >= G4X_WM_LEVEL_HPLL &&
+                wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
+               wm_state->fbc_en = false;
+
+       return 0;
+}
+
+static int g4x_compute_intermediate_wm(struct drm_device *dev,
+                                      struct intel_crtc *crtc,
+                                      struct intel_crtc_state *crtc_state)
+{
+       struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate;
+       const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal;
+       const struct g4x_wm_state *active = &crtc->wm.active.g4x;
+       enum plane_id plane_id;
+
+       intermediate->cxsr = optimal->cxsr && active->cxsr &&
+               !crtc_state->disable_cxsr;
+       intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
+               !crtc_state->disable_cxsr;
+       intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
+
+       for_each_plane_id_on_crtc(crtc, plane_id) {
+               intermediate->wm.plane[plane_id] =
+                       max(optimal->wm.plane[plane_id],
+                           active->wm.plane[plane_id]);
+
+               WARN_ON(intermediate->wm.plane[plane_id] >
+                       g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
+       }
+
+       intermediate->sr.plane = max(optimal->sr.plane,
+                                    active->sr.plane);
+       intermediate->sr.cursor = max(optimal->sr.cursor,
+                                     active->sr.cursor);
+       intermediate->sr.fbc = max(optimal->sr.fbc,
+                                  active->sr.fbc);
+
+       intermediate->hpll.plane = max(optimal->hpll.plane,
+                                      active->hpll.plane);
+       intermediate->hpll.cursor = max(optimal->hpll.cursor,
+                                       active->hpll.cursor);
+       intermediate->hpll.fbc = max(optimal->hpll.fbc,
+                                    active->hpll.fbc);
+
+       WARN_ON((intermediate->sr.plane >
+                g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
+                intermediate->sr.cursor >
+                g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
+               intermediate->cxsr);
+       WARN_ON((intermediate->sr.plane >
+                g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
+                intermediate->sr.cursor >
+                g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
+               intermediate->hpll_en);
+
+       WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
+               intermediate->fbc_en && intermediate->cxsr);
+       WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
+               intermediate->fbc_en && intermediate->hpll_en);
+
+       /*
+        * If our intermediate WM are identical to the final WM, then we can
+        * omit the post-vblank programming; only update if it's different.
+        */
+       if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
+               crtc_state->wm.need_postvbl_update = true;
+
+       return 0;
+}
+
+static void g4x_merge_wm(struct drm_i915_private *dev_priv,
+                        struct g4x_wm_values *wm)
+{
+       struct intel_crtc *crtc;
+       int num_active_crtcs = 0;
+
+       wm->cxsr = true;
+       wm->hpll_en = true;
+       wm->fbc_en = true;
+
+       for_each_intel_crtc(&dev_priv->drm, crtc) {
+               const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
+
+               if (!crtc->active)
+                       continue;
+
+               if (!wm_state->cxsr)
+                       wm->cxsr = false;
+               if (!wm_state->hpll_en)
+                       wm->hpll_en = false;
+               if (!wm_state->fbc_en)
+                       wm->fbc_en = false;
+
+               num_active_crtcs++;
+       }
+
+       if (num_active_crtcs != 1) {
+               wm->cxsr = false;
+               wm->hpll_en = false;
+               wm->fbc_en = false;
+       }
+
+       for_each_intel_crtc(&dev_priv->drm, crtc) {
+               const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
+               enum pipe pipe = crtc->pipe;
+
+               wm->pipe[pipe] = wm_state->wm;
+               if (crtc->active && wm->cxsr)
+                       wm->sr = wm_state->sr;
+               if (crtc->active && wm->hpll_en)
+                       wm->hpll = wm_state->hpll;
+       }
  }
  
-#undef FW_WM_VLV
+static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
+{
+       struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
+       struct g4x_wm_values new_wm = {};
+
+       g4x_merge_wm(dev_priv, &new_wm);
+
+       if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
+               return;
+
+       if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
+               _intel_set_memory_cxsr(dev_priv, false);
+
+       g4x_write_wm_values(dev_priv, &new_wm);
+
+       if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
+               _intel_set_memory_cxsr(dev_priv, true);
+
+       *old_wm = new_wm;
+}
+
+static void g4x_initial_watermarks(struct intel_atomic_state *state,
+                                  struct intel_crtc_state *crtc_state)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+       mutex_lock(&dev_priv->wm.wm_mutex);
+       crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
+       g4x_program_watermarks(dev_priv);
+       mutex_unlock(&dev_priv->wm.wm_mutex);
+}
+
+static void g4x_optimize_watermarks(struct intel_atomic_state *state,
+                                   struct intel_crtc_state *crtc_state)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+
+       if (!crtc_state->wm.need_postvbl_update)
+               return;
+
+       mutex_lock(&dev_priv->wm.wm_mutex);
+       intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
+       g4x_program_watermarks(dev_priv);
+       mutex_unlock(&dev_priv->wm.wm_mutex);
+}
  
  /* latency must be in 0.1us units. */
  static unsigned int vlv_wm_method2(unsigned int pixel_rate,
-                                  unsigned int pipe_htotal,
-                                  unsigned int horiz_pixels,
+                                  unsigned int htotal,
+                                  unsigned int width,
                                    unsigned int cpp,
                                    unsigned int latency)
  {
         unsigned int ret;
  
-       ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-       ret = (ret + 1) * horiz_pixels * cpp;
+       ret = intel_wm_method2(pixel_rate, htotal,
+                              width, cpp, latency);
         ret = DIV_ROUND_UP(ret, 64);
  
         return ret;
@@ -1029,17 +1582,15 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
         if (dev_priv->wm.pri_latency[level] == 0)
                 return USHRT_MAX;
  
-       if (!plane_state->base.visible)
+       if (!intel_wm_plane_visible(crtc_state, plane_state))
                 return 0;
  
         cpp = plane_state->base.fb->format->cpp[0];
         clock = adjusted_mode->crtc_clock;
         htotal = adjusted_mode->crtc_htotal;
         width = crtc_state->pipe_src_w;
-       if (WARN_ON(htotal == 0))
-               htotal = 1;
  
-       if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+       if (plane->id == PLANE_CURSOR) {
                 /*
                  * FIXME the formula gives values that are
                  * too big for the cursor FIFO, and hence we
@@ -1064,7 +1615,7 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
  static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
  {
         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-       const struct vlv_pipe_wm *raw =
+       const struct g4x_pipe_wm *raw =
                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
@@ -1143,18 +1694,13 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
         return 0;
  }
  
-static int vlv_num_wm_levels(struct drm_i915_private *dev_priv)
-{
-       return dev_priv->wm.max_level + 1;
-}
-
  /* mark all levels starting from 'level' as invalid */
  static void vlv_invalidate_wms(struct intel_crtc *crtc,
                                struct vlv_wm_state *wm_state, int level)
  {
         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
  
-       for (; level < vlv_num_wm_levels(dev_priv); level++) {
+       for (; level < intel_wm_num_levels(dev_priv); level++) {
                 enum plane_id plane_id;
  
                 for_each_plane_id_on_crtc(crtc, plane_id)
@@ -1181,11 +1727,11 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
                                  int level, enum plane_id plane_id, u16 value)
  {
         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-       int num_levels = vlv_num_wm_levels(dev_priv);
+       int num_levels = intel_wm_num_levels(dev_priv);
         bool dirty = false;
  
         for (; level < num_levels; level++) {
-               struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+               struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
  
                 dirty |= raw->plane[plane_id] != value;
                 raw->plane[plane_id] = value;
@@ -1194,22 +1740,22 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
         return dirty;
  }
  
-static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
-                                const struct intel_plane_state *plane_state)
+static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
+                                    const struct intel_plane_state *plane_state)
  {
         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
         enum plane_id plane_id = plane->id;
-       int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev));
+       int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
         int level;
         bool dirty = false;
  
-       if (!plane_state->base.visible) {
+       if (!intel_wm_plane_visible(crtc_state, plane_state)) {
                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
                 goto out;
         }
  
         for (level = 0; level < num_levels; level++) {
-               struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+               struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
  
@@ -1225,7 +1771,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
  
  out:
         if (dirty)
-               DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n",
+               DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
                               plane->base.name,
                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
@@ -1234,10 +1780,10 @@ out:
         return dirty;
  }
  
-static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
-                                 enum plane_id plane_id, int level)
+static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
+                                     enum plane_id plane_id, int level)
  {
-       const struct vlv_pipe_wm *raw =
+       const struct g4x_pipe_wm *raw =
                 &crtc_state->wm.vlv.raw[level];
         const struct vlv_fifo_state *fifo_state =
                 &crtc_state->wm.vlv.fifo_state;
@@ -1245,12 +1791,12 @@ static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
  }
  
-static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
+static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
  {
-       return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
-               vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
-               vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
-               vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
+       return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
+               vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
+               vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
+               vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
  }
  
  static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
@@ -1279,7 +1825,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
                     old_plane_state->base.crtc != &crtc->base)
                         continue;
  
-               if (vlv_plane_wm_compute(crtc_state, plane_state))
+               if (vlv_raw_plane_wm_compute(crtc_state, plane_state))
                         dirty |= BIT(plane->id);
         }
  
@@ -1313,7 +1859,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
         }
  
         /* initially allow all levels */
-       wm_state->num_levels = vlv_num_wm_levels(dev_priv);
+       wm_state->num_levels = intel_wm_num_levels(dev_priv);
         /*
          * Note that enabling cxsr with no primary/sprite planes
          * enabled can wedge the pipe. Hence we only allow cxsr
@@ -1322,10 +1868,10 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
  
         for (level = 0; level < wm_state->num_levels; level++) {
-               const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+               const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
  
-               if (!vlv_crtc_wm_is_valid(crtc_state, level))
+               if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
                         break;
  
                 for_each_plane_id_on_crtc(crtc, plane_id) {
@@ -1539,16 +2085,6 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv,
         }
  }
  
-static bool is_disabling(int old, int new, int threshold)
-{
-       return old >= threshold && new < threshold;
-}
-
-static bool is_enabling(int old, int new, int threshold)
-{
-       return old < threshold && new >= threshold;
-}
-
  static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
  {
         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
@@ -1609,65 +2145,6 @@ static void vlv_optimize_watermarks(struct intel_atomic_state *state,
         mutex_unlock(&dev_priv->wm.wm_mutex);
  }
  
-#define single_plane_enabled(mask) is_power_of_2(mask)
-
-static void g4x_update_wm(struct intel_crtc *crtc)
-{
-       struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-       static const int sr_latency_ns = 12000;
-       int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
-       int plane_sr, cursor_sr;
-       unsigned int enabled = 0;
-       bool cxsr_enabled;
-
-       if (g4x_compute_wm0(dev_priv, PIPE_A,
-                           &g4x_wm_info, pessimal_latency_ns,
-                           &g4x_cursor_wm_info, pessimal_latency_ns,
-                           &planea_wm, &cursora_wm))
-               enabled |= 1 << PIPE_A;
-
-       if (g4x_compute_wm0(dev_priv, PIPE_B,
-                           &g4x_wm_info, pessimal_latency_ns,
-                           &g4x_cursor_wm_info, pessimal_latency_ns,
-                           &planeb_wm, &cursorb_wm))
-               enabled |= 1 << PIPE_B;
-
-       if (single_plane_enabled(enabled) &&
-           g4x_compute_srwm(dev_priv, ffs(enabled) - 1,
-                            sr_latency_ns,
-                            &g4x_wm_info,
-                            &g4x_cursor_wm_info,
-                            &plane_sr, &cursor_sr)) {
-               cxsr_enabled = true;
-       } else {
-               cxsr_enabled = false;
-               intel_set_memory_cxsr(dev_priv, false);
-               plane_sr = cursor_sr = 0;
-       }
-
-       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
-                     "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
-                     planea_wm, cursora_wm,
-                     planeb_wm, cursorb_wm,
-                     plane_sr, cursor_sr);
-
-       I915_WRITE(DSPFW1,
-                  FW_WM(plane_sr, SR) |
-                  FW_WM(cursorb_wm, CURSORB) |
-                  FW_WM(planeb_wm, PLANEB) |
-                  FW_WM(planea_wm, PLANEA));
-       I915_WRITE(DSPFW2,
-                  (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
-                  FW_WM(cursora_wm, CURSORA));
-       /* HPLL off in SR has some issues on G4x... disable it */
-       I915_WRITE(DSPFW3,
-                  (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
-                  FW_WM(cursor_sr, CURSOR_SR));
-
-       if (cxsr_enabled)
-               intel_set_memory_cxsr(dev_priv, true);
-}
-
  static void i965_update_wm(struct intel_crtc *unused_crtc)
  {
         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
@@ -1689,14 +2166,10 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
                 int htotal = adjusted_mode->crtc_htotal;
                 int hdisplay = crtc->config->pipe_src_w;
                 int cpp = fb->format->cpp[0];
-               unsigned long line_time_us;
                 int entries;
  
-               line_time_us = max(htotal * 1000 / clock, 1);
-
-               /* Use ns/us then divide to preserve precision */
-               entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-                       cpp * hdisplay;
+               entries = intel_wm_method2(clock, htotal,
+                                          hdisplay, cpp, sr_latency_ns / 100);
                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
                 srwm = I965_FIFO_SIZE - entries;
                 if (srwm < 0)
@@ -1705,13 +2178,14 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
                               entries, srwm);
  
-               entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-                       cpp * crtc->base.cursor->state->crtc_w;
+               entries = intel_wm_method2(clock, htotal,
+                                          crtc->base.cursor->state->crtc_w, 4,
+                                          sr_latency_ns / 100);
                 entries = DIV_ROUND_UP(entries,
-                                         i965_cursor_wm_info.cacheline_size);
-               cursor_sr = i965_cursor_wm_info.fifo_size -
-                       (entries + i965_cursor_wm_info.guard_size);
+                                      i965_cursor_wm_info.cacheline_size) +
+                       i965_cursor_wm_info.guard_size;
  
+               cursor_sr = i965_cursor_wm_info.fifo_size - entries;
                 if (cursor_sr > i965_cursor_wm_info.max_wm)
                         cursor_sr = i965_cursor_wm_info.max_wm;
  
@@ -1848,7 +2322,6 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
                 int htotal = adjusted_mode->crtc_htotal;
                 int hdisplay = enabled->config->pipe_src_w;
                 int cpp;
-               unsigned long line_time_us;
                 int entries;
  
                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
@@ -1856,11 +2329,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
                 else
                         cpp = fb->format->cpp[0];
  
-               line_time_us = max(htotal * 1000 / clock, 1);
-
-               /* Use ns/us then divide to preserve precision */
-               entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-                       cpp * hdisplay;
+               entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
+                                          sr_latency_ns / 100);
                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
                 srwm = wm_info->fifo_size - entries;
@@ -1917,34 +2387,31 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
  }
  
  /* latency must be in 0.1us units. */
-static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
+static unsigned int ilk_wm_method1(unsigned int pixel_rate,
+                                  unsigned int cpp,
+                                  unsigned int latency)
  {
-       uint64_t ret;
-
-       if (WARN(latency == 0, "Latency value missing\n"))
-               return UINT_MAX;
+       unsigned int ret;
  
-       ret = (uint64_t) pixel_rate * cpp * latency;
-       ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
+       ret = intel_wm_method1(pixel_rate, cpp, latency);
+       ret = DIV_ROUND_UP(ret, 64) + 2;
  
         return ret;
  }
  
  /* latency must be in 0.1us units. */
-static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
-                              uint32_t horiz_pixels, uint8_t cpp,
-                              uint32_t latency)
+static unsigned int ilk_wm_method2(unsigned int pixel_rate,
+                                  unsigned int htotal,
+                                  unsigned int width,
+                                  unsigned int cpp,
+                                  unsigned int latency)
  {
-       uint32_t ret;
-
-       if (WARN(latency == 0, "Latency value missing\n"))
-               return UINT_MAX;
-       if (WARN_ON(!pipe_htotal))
-               return UINT_MAX;
+       unsigned int ret;
  
-       ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-       ret = (ret + 1) * horiz_pixels * cpp;
+       ret = intel_wm_method2(pixel_rate, htotal,
+                              width, cpp, latency);
         ret = DIV_ROUND_UP(ret, 64) + 2;
+
         return ret;
  }
  
@@ -3360,26 +3827,27 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
   * Return value is provided in 16.16 fixed point form to retain fractional part.
   * Caller should take care of dividing & rounding off the value.
   */
-static uint32_t
+static uint_fixed_16_16_t
  skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
                            const struct intel_plane_state *pstate)
  {
         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
-       uint32_t downscale_h, downscale_w;
         uint32_t src_w, src_h, dst_w, dst_h;
+       uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
+       uint_fixed_16_16_t downscale_h, downscale_w;
  
         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
-               return DRM_PLANE_HELPER_NO_SCALING;
+               return u32_to_fixed_16_16(0);
  
         /* n.b., src is 16.16 fixed point, dst is whole integer */
         if (plane->id == PLANE_CURSOR) {
-               src_w = pstate->base.src_w;
-               src_h = pstate->base.src_h;
+               src_w = pstate->base.src_w >> 16;
+               src_h = pstate->base.src_h >> 16;
                 dst_w = pstate->base.crtc_w;
                 dst_h = pstate->base.crtc_h;
         } else {
-               src_w = drm_rect_width(&pstate->base.src);
-               src_h = drm_rect_height(&pstate->base.src);
+               src_w = drm_rect_width(&pstate->base.src) >> 16;
+               src_h = drm_rect_height(&pstate->base.src) >> 16;
                 dst_w = drm_rect_width(&pstate->base.dst);
                 dst_h = drm_rect_height(&pstate->base.dst);
         }
@@ -3387,11 +3855,12 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
         if (drm_rotation_90_or_270(pstate->base.rotation))
                 swap(dst_w, dst_h);
  
-       downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
-       downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
+       fp_w_ratio = fixed_16_16_div(src_w, dst_w);
+       fp_h_ratio = fixed_16_16_div(src_h, dst_h);
+       downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1));
+       downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1));
  
-       /* Provide result in 16.16 fixed point */
-       return (uint64_t)downscale_w * downscale_h >> 16;
+       return mul_fixed16(downscale_w, downscale_h);
  }
  
  static unsigned int
@@ -3401,10 +3870,11 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
  {
         struct intel_plane *plane = to_intel_plane(pstate->plane);
         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
-       uint32_t down_scale_amount, data_rate;
+       uint32_t data_rate;
         uint32_t width = 0, height = 0;
         struct drm_framebuffer *fb;
         u32 format;
+       uint_fixed_16_16_t down_scale_amount;
  
         if (!intel_pstate->base.visible)
                 return 0;
@@ -3438,7 +3908,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
  
         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
  
-       return (uint64_t)data_rate * down_scale_amount >> 16;
+       return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
  }
  
  /*
@@ -3587,6 +4057,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
         int num_active;
         unsigned plane_data_rate[I915_MAX_PLANES] = {};
         unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
+       uint16_t total_min_blocks = 0;
  
         /* Clear the partitioning for disabled planes. */
         memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
@@ -3602,10 +4073,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
  
         skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
         alloc_size = skl_ddb_entry_size(alloc);
-       if (alloc_size == 0) {
-               memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
+       if (alloc_size == 0)
                 return 0;
-       }
  
         skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
  
@@ -3616,10 +4085,18 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
          */
  
         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
-               alloc_size -= minimum[plane_id];
-               alloc_size -= y_minimum[plane_id];
+               total_min_blocks += minimum[plane_id];
+               total_min_blocks += y_minimum[plane_id];
+       }
+
+       if (total_min_blocks > alloc_size) {
+               DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
+               DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
+                                                       alloc_size);
+               return -EINVAL;
         }
  
+       alloc_size -= total_min_blocks;
         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
  
@@ -3698,7 +4175,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
                 return FP_16_16_MAX;
  
         wm_intermediate_val = latency * pixel_rate * cpp;
-       ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512);
+       ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512);
         return ret;
  }
  
@@ -3720,12 +4197,33 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
         return ret;
  }
  
-static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
-                                             struct intel_plane_state *pstate)
+static uint_fixed_16_16_t
+intel_get_linetime_us(struct intel_crtc_state *cstate)
+{
+       uint32_t pixel_rate;
+       uint32_t crtc_htotal;
+       uint_fixed_16_16_t linetime_us;
+
+       if (!cstate->base.active)
+               return u32_to_fixed_16_16(0);
+
+       pixel_rate = cstate->pixel_rate;
+
+       if (WARN_ON(pixel_rate == 0))
+               return u32_to_fixed_16_16(0);
+
+       crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
+       linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate);
+
+       return linetime_us;
+}
+
+static uint32_t
+skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
+                             const struct intel_plane_state *pstate)
  {
         uint64_t adjusted_pixel_rate;
-       uint64_t downscale_amount;
-       uint64_t pixel_rate;
+       uint_fixed_16_16_t downscale_amount;
  
         /* Shouldn't reach here on disabled planes... */
         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
@@ -3738,15 +4236,13 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
         adjusted_pixel_rate = cstate->pixel_rate;
         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
  
-       pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
-       WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
-
-       return pixel_rate;
+       return mul_round_up_u32_fixed16(adjusted_pixel_rate,
+                                           downscale_amount);
  }
  
  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
                                 struct intel_crtc_state *cstate,
-                               struct intel_plane_state *intel_pstate,
+                               const struct intel_plane_state *intel_pstate,
                                 uint16_t ddb_allocation,
                                 int level,
                                 uint16_t *out_blocks, /* out */
@@ -3754,8 +4250,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
                                 bool *enabled /* out */)
  {
         struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
-       struct drm_plane_state *pstate = &intel_pstate->base;
-       struct drm_framebuffer *fb = pstate->fb;
+       const struct drm_plane_state *pstate = &intel_pstate->base;
+       const struct drm_framebuffer *fb = pstate->fb;
         uint32_t latency = dev_priv->wm.skl_latency[level];
         uint_fixed_16_16_t method1, method2;
         uint_fixed_16_16_t plane_blocks_per_line;
@@ -3834,8 +4330,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
         if (y_tiled) {
                 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
                                            y_min_scanlines, 512);
-               plane_blocks_per_line =
-                     fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines);
+               plane_blocks_per_line = fixed_16_16_div(interm_pbpl,
+                                                       y_min_scanlines);
         } else if (x_tiled) {
                 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
                 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
@@ -3856,19 +4352,25 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
         if (y_tiled) {
                 selected_result = max_fixed_16_16(method2, y_tile_minimum);
         } else {
+               uint32_t linetime_us;
+
+               linetime_us = fixed_16_16_to_u32_round_up(
+                               intel_get_linetime_us(cstate));
                 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
                     (plane_bytes_per_line / 512 < 1))
                         selected_result = method2;
-               else if ((ddb_allocation /
+               else if ((ddb_allocation && ddb_allocation /
                         fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1)
                         selected_result = min_fixed_16_16(method1, method2);
+               else if (latency >= linetime_us)
+                       selected_result = min_fixed_16_16(method1, method2);
                 else
                         selected_result = method1;
         }
  
         res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1;
-       res_lines = DIV_ROUND_UP(selected_result.val,
-                                plane_blocks_per_line.val);
+       res_lines = div_round_up_fixed16(selected_result,
+                                        plane_blocks_per_line);
  
         if (level >= 1 && level <= 7) {
                 if (y_tiled) {
@@ -3907,54 +4409,39 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
  }
  
  static int
-skl_compute_wm_level(const struct drm_i915_private *dev_priv,
-                    struct skl_ddb_allocation *ddb,
-                    struct intel_crtc_state *cstate,
-                    struct intel_plane *intel_plane,
-                    int level,
-                    struct skl_wm_level *result)
+skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
+                     struct skl_ddb_allocation *ddb,
+                     struct intel_crtc_state *cstate,
+                     const struct intel_plane_state *intel_pstate,
+                     struct skl_plane_wm *wm)
  {
-       struct drm_atomic_state *state = cstate->base.state;
         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
-       struct drm_plane *plane = &intel_plane->base;
-       struct intel_plane_state *intel_pstate = NULL;
+       struct drm_plane *plane = intel_pstate->base.plane;
+       struct intel_plane *intel_plane = to_intel_plane(plane);
         uint16_t ddb_blocks;
         enum pipe pipe = intel_crtc->pipe;
+       int level, max_level = ilk_wm_max_level(dev_priv);
         int ret;
  
-       if (state)
-               intel_pstate =
-                       intel_atomic_get_existing_plane_state(state,
-                                                             intel_plane);
-
-       /*
-        * Note: If we start supporting multiple pending atomic commits against
-        * the same planes/CRTC's in the future, plane->state will no longer be
-        * the correct pre-state to use for the calculations here and we'll
-        * need to change where we get the 'unchanged' plane data from.
-        *
-        * For now this is fine because we only allow one queued commit against
-        * a CRTC.  Even if the plane isn't modified by this transaction and we
-        * don't have a plane lock, we still have the CRTC's lock, so we know
-        * that no other transactions are racing with us to update it.
-        */
-       if (!intel_pstate)
-               intel_pstate = to_intel_plane_state(plane->state);
-
-       WARN_ON(!intel_pstate->base.fb);
+       if (WARN_ON(!intel_pstate->base.fb))
+               return -EINVAL;
  
         ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
  
-       ret = skl_compute_plane_wm(dev_priv,
-                                  cstate,
-                                  intel_pstate,
-                                  ddb_blocks,
-                                  level,
-                                  &result->plane_res_b,
-                                  &result->plane_res_l,
-                                  &result->plane_en);
-       if (ret)
-               return ret;
+       for (level = 0; level <= max_level; level++) {
+               struct skl_wm_level *result = &wm->wm[level];
+
+               ret = skl_compute_plane_wm(dev_priv,
+                                          cstate,
+                                          intel_pstate,
+                                          ddb_blocks,
+                                          level,
+                                          &result->plane_res_b,
+                                          &result->plane_res_l,
+                                          &result->plane_en);
+               if (ret)
+                       return ret;
+       }
  
         return 0;
  }
@@ -3964,19 +4451,16 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
  {
         struct drm_atomic_state *state = cstate->base.state;
         struct drm_i915_private *dev_priv = to_i915(state->dev);
-       uint32_t pixel_rate;
+       uint_fixed_16_16_t linetime_us;
         uint32_t linetime_wm;
  
-       if (!cstate->base.active)
-               return 0;
-
-       pixel_rate = cstate->pixel_rate;
+       linetime_us = intel_get_linetime_us(cstate);
  
-       if (WARN_ON(pixel_rate == 0))
+       if (is_fixed16_zero(linetime_us))
                 return 0;
  
-       linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal *
-                                  1000, pixel_rate);
+       linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8,
+                               linetime_us));
  
         /* Display WA #1135: bxt. */
         if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
@@ -4000,10 +4484,11 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
                              struct skl_pipe_wm *pipe_wm)
  {
         struct drm_device *dev = cstate->base.crtc->dev;
+       struct drm_crtc_state *crtc_state = &cstate->base;
         const struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane;
+       struct drm_plane *plane;
+       const struct drm_plane_state *pstate;
         struct skl_plane_wm *wm;
-       int level, max_level = ilk_wm_max_level(dev_priv);
         int ret;
  
         /*
@@ -4012,18 +4497,17 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
          */
         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
  
-       for_each_intel_plane_mask(&dev_priv->drm,
-                                 intel_plane,
-                                 cstate->base.plane_mask) {
-               wm = &pipe_wm->planes[intel_plane->id];
+       drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
+               const struct intel_plane_state *intel_pstate =
+                                               to_intel_plane_state(pstate);
+               enum plane_id plane_id = to_intel_plane(plane)->id;
+
+               wm = &pipe_wm->planes[plane_id];
  
-               for (level = 0; level <= max_level; level++) {
-                       ret = skl_compute_wm_level(dev_priv, ddb, cstate,
-                                                  intel_plane, level,
-                                                  &wm->wm[level]);
-                       if (ret)
-                               return ret;
-               }
+               ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
+                                           intel_pstate, wm);
+               if (ret)
+                       return ret;
                 skl_compute_transition_wm(cstate, &wm->trans_wm);
         }
         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
@@ -4654,6 +5138,32 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
  #define _FW_WM_VLV(value, plane) \
         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
  
+static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
+                              struct g4x_wm_values *wm)
+{
+       uint32_t tmp;
+
+       tmp = I915_READ(DSPFW1);
+       wm->sr.plane = _FW_WM(tmp, SR);
+       wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
+       wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
+       wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
+
+       tmp = I915_READ(DSPFW2);
+       wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
+       wm->sr.fbc = _FW_WM(tmp, FBC_SR);
+       wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
+       wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
+       wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
+       wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
+
+       tmp = I915_READ(DSPFW3);
+       wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
+       wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
+       wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
+       wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
+}
+
  static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
                                struct vlv_wm_values *wm)
  {
@@ -4730,6 +5240,147 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
  #undef _FW_WM
  #undef _FW_WM_VLV
  
+void g4x_wm_get_hw_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct g4x_wm_values *wm = &dev_priv->wm.g4x;
+       struct intel_crtc *crtc;
+
+       g4x_read_wm_values(dev_priv, wm);
+
+       wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
+
+       for_each_intel_crtc(dev, crtc) {
+               struct intel_crtc_state *crtc_state =
+                       to_intel_crtc_state(crtc->base.state);
+               struct g4x_wm_state *active = &crtc->wm.active.g4x;
+               struct g4x_pipe_wm *raw;
+               enum pipe pipe = crtc->pipe;
+               enum plane_id plane_id;
+               int level, max_level;
+
+               active->cxsr = wm->cxsr;
+               active->hpll_en = wm->hpll_en;
+               active->fbc_en = wm->fbc_en;
+
+               active->sr = wm->sr;
+               active->hpll = wm->hpll;
+
+               for_each_plane_id_on_crtc(crtc, plane_id) {
+                       active->wm.plane[plane_id] =
+                               wm->pipe[pipe].plane[plane_id];
+               }
+
+               if (wm->cxsr && wm->hpll_en)
+                       max_level = G4X_WM_LEVEL_HPLL;
+               else if (wm->cxsr)
+                       max_level = G4X_WM_LEVEL_SR;
+               else
+                       max_level = G4X_WM_LEVEL_NORMAL;
+
+               level = G4X_WM_LEVEL_NORMAL;
+               raw = &crtc_state->wm.g4x.raw[level];
+               for_each_plane_id_on_crtc(crtc, plane_id)
+                       raw->plane[plane_id] = active->wm.plane[plane_id];
+
+               if (++level > max_level)
+                       goto out;
+
+               raw = &crtc_state->wm.g4x.raw[level];
+               raw->plane[PLANE_PRIMARY] = active->sr.plane;
+               raw->plane[PLANE_CURSOR] = active->sr.cursor;
+               raw->plane[PLANE_SPRITE0] = 0;
+               raw->fbc = active->sr.fbc;
+
+               if (++level > max_level)
+                       goto out;
+
+               raw = &crtc_state->wm.g4x.raw[level];
+               raw->plane[PLANE_PRIMARY] = active->hpll.plane;
+               raw->plane[PLANE_CURSOR] = active->hpll.cursor;
+               raw->plane[PLANE_SPRITE0] = 0;
+               raw->fbc = active->hpll.fbc;
+
+       out:
+               for_each_plane_id_on_crtc(crtc, plane_id)
+                       g4x_raw_plane_wm_set(crtc_state, level,
+                                            plane_id, USHRT_MAX);
+               g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
+
+               crtc_state->wm.g4x.optimal = *active;
+               crtc_state->wm.g4x.intermediate = *active;
+
+               DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
+                             pipe_name(pipe),
+                             wm->pipe[pipe].plane[PLANE_PRIMARY],
+                             wm->pipe[pipe].plane[PLANE_CURSOR],
+                             wm->pipe[pipe].plane[PLANE_SPRITE0]);
+       }
+
+       DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
+                     wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
+       DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
+                     wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
+       DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
+                     yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
+}
+
+void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
+{
+       struct intel_plane *plane;
+       struct intel_crtc *crtc;
+
+       mutex_lock(&dev_priv->wm.wm_mutex);
+
+       for_each_intel_plane(&dev_priv->drm, plane) {
+               struct intel_crtc *crtc =
+                       intel_get_crtc_for_pipe(dev_priv, plane->pipe);
+               struct intel_crtc_state *crtc_state =
+                       to_intel_crtc_state(crtc->base.state);
+               struct intel_plane_state *plane_state =
+                       to_intel_plane_state(plane->base.state);
+               struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
+               enum plane_id plane_id = plane->id;
+               int level;
+
+               if (plane_state->base.visible)
+                       continue;
+
+               for (level = 0; level < 3; level++) {
+                       struct g4x_pipe_wm *raw =
+                               &crtc_state->wm.g4x.raw[level];
+
+                       raw->plane[plane_id] = 0;
+                       wm_state->wm.plane[plane_id] = 0;
+               }
+
+               if (plane_id == PLANE_PRIMARY) {
+                       for (level = 0; level < 3; level++) {
+                               struct g4x_pipe_wm *raw =
+                                       &crtc_state->wm.g4x.raw[level];
+                               raw->fbc = 0;
+                       }
+
+                       wm_state->sr.fbc = 0;
+                       wm_state->hpll.fbc = 0;
+                       wm_state->fbc_en = false;
+               }
+       }
+
+       for_each_intel_crtc(&dev_priv->drm, crtc) {
+               struct intel_crtc_state *crtc_state =
+                       to_intel_crtc_state(crtc->base.state);
+
+               crtc_state->wm.g4x.intermediate =
+                       crtc_state->wm.g4x.optimal;
+               crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
+       }
+
+       g4x_program_watermarks(dev_priv);
+
+       mutex_unlock(&dev_priv->wm.wm_mutex);
+}
+
  void vlv_wm_get_hw_state(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4792,7 +5443,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
                 active->cxsr = wm->cxsr;
  
                 for (level = 0; level < active->num_levels; level++) {
-                       struct vlv_pipe_wm *raw =
+                       struct g4x_pipe_wm *raw =
                                 &crtc_state->wm.vlv.raw[level];
  
                         active->sr[level].plane = wm->sr.plane;
@@ -4852,7 +5503,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
                         continue;
  
                 for (level = 0; level < wm_state->num_levels; level++) {
-                       struct vlv_pipe_wm *raw =
+                       struct g4x_pipe_wm *raw =
                                 &crtc_state->wm.vlv.raw[level];
  
                         raw->plane[plane_id] = 0;
@@ -8036,6 +8687,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
+       } else if (IS_G4X(dev_priv)) {
+               g4x_setup_wm_latency(dev_priv);
+               dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
+               dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
+               dev_priv->display.initial_watermarks = g4x_initial_watermarks;
+               dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
         } else if (IS_PINEVIEW(dev_priv)) {
                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
                                             dev_priv->is_ddr3,
@@ -8051,8 +8708,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
                         dev_priv->display.update_wm = NULL;
                 } else
                         dev_priv->display.update_wm = pineview_update_wm;
-       } else if (IS_G4X(dev_priv)) {
-               dev_priv->display.update_wm = g4x_update_wm;
         } else if (IS_GEN4(dev_priv)) {
                 dev_priv->display.update_wm = i965_update_wm;
         } else if (IS_GEN3(dev_priv)) {
@@ -8135,9 +8790,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
  
-       if (intel_wait_for_register_fw(dev_priv,
-                                      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-                                      500)) {
+       if (__intel_wait_for_register_fw(dev_priv,
+                                        GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+                                        500, 0, NULL)) {
                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
                 return -ETIMEDOUT;
         }
@@ -8180,9 +8835,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
  
-       if (intel_wait_for_register_fw(dev_priv,
-                                      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-                                      500)) {
+       if (__intel_wait_for_register_fw(dev_priv,
+                                        GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+                                        500, 0, NULL)) {
                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
                 return -ETIMEDOUT;
         }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 66a2b8b83972691d04f2737337e7ea6cf6a72851..acd1da9b62a3f2d5ed2f4620c130c33c684d6042 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -39,17 +39,27 @@
   */
  #define LEGACY_REQUEST_SIZE 200
  
-static int __intel_ring_space(int head, int tail, int size)
+static unsigned int __intel_ring_space(unsigned int head,
+                                      unsigned int tail,
+                                      unsigned int size)
  {
-       int space = head - tail;
-       if (space <= 0)
-               space += size;
-       return space - I915_RING_FREE_SPACE;
+       /*
+        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+        * same cacheline, the Head Pointer must not be greater than the Tail
+        * Pointer."
+        */
+       GEM_BUG_ON(!is_power_of_2(size));
+       return (head - tail - CACHELINE_BYTES) & (size - 1);
  }
  
-void intel_ring_update_space(struct intel_ring *ring)
+unsigned int intel_ring_update_space(struct intel_ring *ring)
  {
-       ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+       unsigned int space;
+
+       space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+       ring->space = space;
+       return space;
  }
  
  static int
@@ -538,9 +548,9 @@ static int init_ring_common(struct intel_engine_cs *engine)
         I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
  
         /* If the head is still not zero, the ring is dead */
-       if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
-                                      RING_VALID, RING_VALID,
-                                      50)) {
+       if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base),
+                                   RING_VALID, RING_VALID,
+                                   50)) {
                 DRM_ERROR("%s initialization failed "
                           "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
                           engine->name,
@@ -774,8 +784,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
  
         i915_gem_request_submit(request);
  
-       assert_ring_tail_valid(request->ring, request->tail);
-       I915_WRITE_TAIL(request->engine, request->tail);
+       I915_WRITE_TAIL(request->engine,
+                       intel_ring_set_tail(request->ring, request->tail));
  }
  
  static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1259,6 +1269,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
  
+       GEM_BUG_ON(engine->id != RCS);
+
         dev_priv->status_page_dmah =
                 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
         if (!dev_priv->status_page_dmah)
@@ -1270,17 +1282,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
         return 0;
  }
  
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
+int intel_ring_pin(struct intel_ring *ring,
+                  struct drm_i915_private *i915,
+                  unsigned int offset_bias)
  {
-       unsigned int flags;
-       enum i915_map_type map;
+       enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
         struct i915_vma *vma = ring->vma;
+       unsigned int flags;
         void *addr;
         int ret;
  
         GEM_BUG_ON(ring->vaddr);
  
-       map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
  
         flags = PIN_GLOBAL;
         if (offset_bias)
@@ -1316,11 +1329,23 @@ err:
         return PTR_ERR(addr);
  }
  
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+       GEM_BUG_ON(!list_empty(&ring->request_list));
+       ring->tail = tail;
+       ring->head = tail;
+       ring->emit = tail;
+       intel_ring_update_space(ring);
+}
+
  void intel_ring_unpin(struct intel_ring *ring)
  {
         GEM_BUG_ON(!ring->vma);
         GEM_BUG_ON(!ring->vaddr);
  
+       /* Discard any unused bytes beyond that submitted to hw. */
+       intel_ring_reset(ring, ring->tail);
+
         if (i915_vma_is_map_and_fenceable(ring->vma))
                 i915_vma_unpin_iomap(ring->vma);
         else
@@ -1338,7 +1363,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
  
         obj = i915_gem_object_create_stolen(dev_priv, size);
         if (!obj)
-               obj = i915_gem_object_create(dev_priv, size);
+               obj = i915_gem_object_create_internal(dev_priv, size);
         if (IS_ERR(obj))
                 return ERR_CAST(obj);
  
@@ -1369,8 +1394,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
         if (!ring)
                 return ERR_PTR(-ENOMEM);
  
-       ring->engine = engine;
-
         INIT_LIST_HEAD(&ring->request_list);
  
         ring->size = size;
@@ -1424,22 +1447,73 @@ static int context_pin(struct i915_gem_context *ctx)
                             PIN_GLOBAL | PIN_HIGH);
  }
  
-static int intel_ring_context_pin(struct intel_engine_cs *engine,
-                                 struct i915_gem_context *ctx)
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *i915 = engine->i915;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+
+       obj = i915_gem_object_create(i915, engine->context_size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       /*
+        * Try to make the context utilize L3 as well as LLC.
+        *
+        * On VLV we don't have L3 controls in the PTEs so we
+        * shouldn't touch the cache level, especially as that
+        * would make the object snooped which might have a
+        * negative performance impact.
+        *
+        * Snooping is required on non-llc platforms in execlist
+        * mode, but since all GGTT accesses use PAT entry 0 we
+        * get snooping anyway regardless of cache_level.
+        *
+        * This is only applicable for Ivy Bridge devices since
+        * later platforms don't have L3 control bits in the PTE.
+        */
+       if (IS_IVYBRIDGE(i915)) {
+               /* Ignore any error, regard it as a simple optimisation */
+               i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+       }
+
+       vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+       if (IS_ERR(vma))
+               i915_gem_object_put(obj);
+
+       return vma;
+}
+
+static struct intel_ring *
+intel_ring_context_pin(struct intel_engine_cs *engine,
+                      struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
         int ret;
  
         lockdep_assert_held(&ctx->i915->drm.struct_mutex);
  
-       if (ce->pin_count++)
-               return 0;
+       if (likely(ce->pin_count++))
+               goto out;
         GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
  
+       if (!ce->state && engine->context_size) {
+               struct i915_vma *vma;
+
+               vma = alloc_context_vma(engine);
+               if (IS_ERR(vma)) {
+                       ret = PTR_ERR(vma);
+                       goto err;
+               }
+
+               ce->state = vma;
+       }
+
         if (ce->state) {
                 ret = context_pin(ctx);
                 if (ret)
-                       goto error;
+                       goto err;
  
                 ce->state->obj->mm.dirty = true;
         }
@@ -1455,11 +1529,14 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
                 ce->initialised = true;
  
         i915_gem_context_get(ctx);
-       return 0;
  
-error:
+out:
+       /* One ringbuffer to rule them all */
+       return engine->buffer;
+
+err:
         ce->pin_count = 0;
-       return ret;
+       return ERR_PTR(ret);
  }
  
  static void intel_ring_context_unpin(struct intel_engine_cs *engine,
@@ -1481,78 +1558,70 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine,
  
  static int intel_init_ring_buffer(struct intel_engine_cs *engine)
  {
-       struct drm_i915_private *dev_priv = engine->i915;
         struct intel_ring *ring;
-       int ret;
-
-       WARN_ON(engine->buffer);
+       int err;
  
         intel_engine_setup_common(engine);
  
-       ret = intel_engine_init_common(engine);
-       if (ret)
-               goto error;
+       err = intel_engine_init_common(engine);
+       if (err)
+               goto err;
+
+       if (HWS_NEEDS_PHYSICAL(engine->i915))
+               err = init_phys_status_page(engine);
+       else
+               err = init_status_page(engine);
+       if (err)
+               goto err;
  
         ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
         if (IS_ERR(ring)) {
-               ret = PTR_ERR(ring);
-               goto error;
-       }
-
-       if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-               WARN_ON(engine->id != RCS);
-               ret = init_phys_status_page(engine);
-               if (ret)
-                       goto error;
-       } else {
-               ret = init_status_page(engine);
-               if (ret)
-                       goto error;
+               err = PTR_ERR(ring);
+               goto err_hws;
         }
  
         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-       ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE);
-       if (ret) {
-               intel_ring_free(ring);
-               goto error;
-       }
+       err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE);
+       if (err)
+               goto err_ring;
+
+       GEM_BUG_ON(engine->buffer);
         engine->buffer = ring;
  
         return 0;
  
-error:
-       intel_engine_cleanup(engine);
-       return ret;
+err_ring:
+       intel_ring_free(ring);
+err_hws:
+       if (HWS_NEEDS_PHYSICAL(engine->i915))
+               cleanup_phys_status_page(engine);
+       else
+               cleanup_status_page(engine);
+err:
+       intel_engine_cleanup_common(engine);
+       return err;
  }
  
  void intel_engine_cleanup(struct intel_engine_cs *engine)
  {
-       struct drm_i915_private *dev_priv;
-
-       dev_priv = engine->i915;
+       struct drm_i915_private *dev_priv = engine->i915;
  
-       if (engine->buffer) {
-               WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-                       (I915_READ_MODE(engine) & MODE_IDLE) == 0);
+       WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+               (I915_READ_MODE(engine) & MODE_IDLE) == 0);
  
-               intel_ring_unpin(engine->buffer);
-               intel_ring_free(engine->buffer);
-               engine->buffer = NULL;
-       }
+       intel_ring_unpin(engine->buffer);
+       intel_ring_free(engine->buffer);
  
         if (engine->cleanup)
                 engine->cleanup(engine);
  
-       if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-               WARN_ON(engine->id != RCS);
+       if (HWS_NEEDS_PHYSICAL(dev_priv))
                 cleanup_phys_status_page(engine);
-       } else {
+       else
                 cleanup_status_page(engine);
-       }
  
         intel_engine_cleanup_common(engine);
  
-       engine->i915 = NULL;
         dev_priv->engine[engine->id] = NULL;
         kfree(engine);
  }
@@ -1562,8 +1631,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
  
+       /* Restart from the beginning of the rings for convenience */
         for_each_engine(engine, dev_priv, id)
-               engine->buffer->head = engine->buffer->tail;
+               intel_ring_reset(engine->buffer, 0);
  }
  
  static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1578,9 +1648,6 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
          */
         request->reserved_space += LEGACY_REQUEST_SIZE;
  
-       GEM_BUG_ON(!request->engine->buffer);
-       request->ring = request->engine->buffer;
-
         cs = intel_ring_begin(request, 0);
         if (IS_ERR(cs))
                 return PTR_ERR(cs);
@@ -1589,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
         return 0;
  }
  
-static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
+static noinline int wait_for_space(struct drm_i915_gem_request *req,
+                                  unsigned int bytes)
  {
         struct intel_ring *ring = req->ring;
         struct drm_i915_gem_request *target;
@@ -1597,8 +1665,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
  
         lockdep_assert_held(&req->i915->drm.struct_mutex);
  
-       intel_ring_update_space(ring);
-       if (ring->space >= bytes)
+       if (intel_ring_update_space(ring) >= bytes)
                 return 0;
  
         /*
@@ -1613,12 +1680,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
         GEM_BUG_ON(!req->reserved_space);
  
         list_for_each_entry(target, &ring->request_list, ring_link) {
-               unsigned space;
-
                 /* Would completion of this request free enough space? */
-               space = __intel_ring_space(target->postfix, ring->tail,
-                                          ring->size);
-               if (space >= bytes)
+               if (bytes <= __intel_ring_space(target->postfix,
+                                               ring->emit, ring->size))
                         break;
         }
  
@@ -1638,59 +1702,64 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
         return 0;
  }
  
-u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
+u32 *intel_ring_begin(struct drm_i915_gem_request *req,
+                     unsigned int num_dwords)
  {
         struct intel_ring *ring = req->ring;
-       int remain_actual = ring->size - ring->tail;
-       int remain_usable = ring->effective_size - ring->tail;
-       int bytes = num_dwords * sizeof(u32);
-       int total_bytes, wait_bytes;
-       bool need_wrap = false;
+       const unsigned int remain_usable = ring->effective_size - ring->emit;
+       const unsigned int bytes = num_dwords * sizeof(u32);
+       unsigned int need_wrap = 0;
+       unsigned int total_bytes;
         u32 *cs;
  
         total_bytes = bytes + req->reserved_space;
+       GEM_BUG_ON(total_bytes > ring->effective_size);
  
-       if (unlikely(bytes > remain_usable)) {
-               /*
-                * Not enough space for the basic request. So need to flush
-                * out the remainder and then wait for base + reserved.
-                */
-               wait_bytes = remain_actual + total_bytes;
-               need_wrap = true;
-       } else if (unlikely(total_bytes > remain_usable)) {
-               /*
-                * The base request will fit but the reserved space
-                * falls off the end. So we don't need an immediate wrap
-                * and only need to effectively wait for the reserved
-                * size space from the start of ringbuffer.
-                */
-               wait_bytes = remain_actual + req->reserved_space;
-       } else {
-               /* No wrapping required, just waiting. */
-               wait_bytes = total_bytes;
+       if (unlikely(total_bytes > remain_usable)) {
+               const int remain_actual = ring->size - ring->emit;
+
+               if (bytes > remain_usable) {
+                       /*
+                        * Not enough space for the basic request. So need to
+                        * flush out the remainder and then wait for
+                        * base + reserved.
+                        */
+                       total_bytes += remain_actual;
+                       need_wrap = remain_actual | 1;
+               } else  {
+                       /*
+                        * The base request will fit but the reserved space
+                        * falls off the end. So we don't need an immediate
+                        * wrap and only need to effectively wait for the
+                        * reserved size from the start of ringbuffer.
+                        */
+                       total_bytes = req->reserved_space + remain_actual;
+               }
         }
  
-       if (wait_bytes > ring->space) {
-               int ret = wait_for_space(req, wait_bytes);
+       if (unlikely(total_bytes > ring->space)) {
+               int ret = wait_for_space(req, total_bytes);
                 if (unlikely(ret))
                         return ERR_PTR(ret);
         }
  
         if (unlikely(need_wrap)) {
-               GEM_BUG_ON(remain_actual > ring->space);
-               GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+               need_wrap &= ~1;
+               GEM_BUG_ON(need_wrap > ring->space);
+               GEM_BUG_ON(ring->emit + need_wrap > ring->size);
  
                 /* Fill the tail with MI_NOOP */
-               memset(ring->vaddr + ring->tail, 0, remain_actual);
-               ring->tail = 0;
-               ring->space -= remain_actual;
+               memset(ring->vaddr + ring->emit, 0, need_wrap);
+               ring->emit = 0;
+               ring->space -= need_wrap;
         }
  
-       GEM_BUG_ON(ring->tail > ring->size - bytes);
-       cs = ring->vaddr + ring->tail;
-       ring->tail += bytes;
+       GEM_BUG_ON(ring->emit > ring->size - bytes);
+       GEM_BUG_ON(ring->space < bytes);
+       cs = ring->vaddr + ring->emit;
+       GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
+       ring->emit += bytes;
         ring->space -= bytes;
-       GEM_BUG_ON(ring->space < 0);
  
         return cs;
  }
@@ -1699,7 +1768,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
  {
         int num_dwords =
-               (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+               (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
         u32 *cs;
  
         if (num_dwords == 0)
@@ -1736,11 +1805,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
         I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
  
         /* Wait for the ring not to be idle, i.e. for it to wake up. */
-       if (intel_wait_for_register_fw(dev_priv,
-                                      GEN6_BSD_SLEEP_PSMI_CONTROL,
-                                      GEN6_BSD_SLEEP_INDICATOR,
-                                      0,
-                                      50))
+       if (__intel_wait_for_register_fw(dev_priv,
+                                        GEN6_BSD_SLEEP_PSMI_CONTROL,
+                                        GEN6_BSD_SLEEP_INDICATOR,
+                                        0,
+                                        1000, 0, NULL))
                 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
  
         /* Now that the ring is fully powered up, update the tail */
@@ -2182,20 +2251,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
         return intel_init_ring_buffer(engine);
  }
  
-/**
- * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
- */
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-
-       intel_ring_default_vfuncs(dev_priv, engine);
-
-       engine->emit_flush = gen6_bsd_ring_flush;
-
-       return intel_init_ring_buffer(engine);
-}
-
  int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index a82a0807f64dbd0624728fe3c65215abe3647565..6aa20ac8cde388613248f06a627213215a794948 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -17,17 +17,6 @@
  #define CACHELINE_BYTES 64
  #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
  
-/*
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
- * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
- * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
- *
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
- * cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
-#define I915_RING_FREE_SPACE 64
-
  struct intel_hw_status_page {
         struct i915_vma *vma;
         u32 *page_addr;
@@ -139,16 +128,15 @@ struct intel_ring {
         struct i915_vma *vma;
         void *vaddr;
  
-       struct intel_engine_cs *engine;
-
         struct list_head request_list;
  
         u32 head;
         u32 tail;
+       u32 emit;
  
-       int space;
-       int size;
-       int effective_size;
+       u32 space;
+       u32 size;
+       u32 effective_size;
  };
  
  struct i915_gem_context;
@@ -189,15 +177,28 @@ enum intel_engine_id {
         VECS
  };
  
+struct i915_priolist {
+       struct rb_node node;
+       struct list_head requests;
+       int priority;
+};
+
+#define INTEL_ENGINE_CS_MAX_NAME 8
+
  struct intel_engine_cs {
         struct drm_i915_private *i915;
-       const char      *name;
+       char name[INTEL_ENGINE_CS_MAX_NAME];
         enum intel_engine_id id;
-       unsigned int exec_id;
+       unsigned int uabi_id;
         unsigned int hw_id;
         unsigned int guc_id;
-       u32             mmio_base;
+
+       u8 class;
+       u8 instance;
+       u32 context_size;
+       u32 mmio_base;
         unsigned int irq_shift;
+
         struct intel_ring *buffer;
         struct intel_timeline *timeline;
  
@@ -265,8 +266,8 @@ struct intel_engine_cs {
  
         void            (*set_default_submission)(struct intel_engine_cs *engine);
  
-       int             (*context_pin)(struct intel_engine_cs *engine,
-                                      struct i915_gem_context *ctx);
+       struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
+                                         struct i915_gem_context *ctx);
         void            (*context_unpin)(struct intel_engine_cs *engine,
                                          struct i915_gem_context *ctx);
         int             (*request_alloc)(struct drm_i915_gem_request *req);
@@ -372,9 +373,18 @@ struct intel_engine_cs {
  
         /* Execlists */
         struct tasklet_struct irq_tasklet;
+       struct i915_priolist default_priolist;
+       bool no_priolist;
         struct execlist_port {
-               struct drm_i915_gem_request *request;
-               unsigned int count;
+               struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
                 GEM_DEBUG_DECL(u32 context_id);
         } execlist_port[2];
         struct rb_root execlist_queue;
@@ -487,7 +497,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  
  struct intel_ring *
  intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+int intel_ring_pin(struct intel_ring *ring,
+                  struct drm_i915_private *i915,
+                  unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+unsigned int intel_ring_update_space(struct intel_ring *ring);
  void intel_ring_unpin(struct intel_ring *ring);
  void intel_ring_free(struct intel_ring *ring);
  
@@ -498,7 +512,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
  
  int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
  
-u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n);
+u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
+                                  unsigned int n);
  
  static inline void
  intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
@@ -511,7 +526,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
          * reserved for the command packet (i.e. the value passed to
          * intel_ring_begin()).
          */
-       GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+       GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
  }
  
  static inline u32
@@ -538,9 +553,40 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
          */
         GEM_BUG_ON(!IS_ALIGNED(tail, 8));
         GEM_BUG_ON(tail >= ring->size);
+
+       /*
+        * "Ring Buffer Use"
+        *      Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+        *      Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+        *      Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+        * same cacheline, the Head Pointer must not be greater than the Tail
+        * Pointer."
+        *
+        * We use ring->head as the last known location of the actual RING_HEAD,
+        * it may have advanced but in the worst case it is equally the same
+        * as ring->head and so we should never program RING_TAIL to advance
+        * into the same cacheline as ring->head.
+        */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+       GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+                  tail < ring->head);
+#undef cacheline
  }
  
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+       /* Whilst writes to the tail are strictly order, there is no
+        * serialisation between readers and the writers. The tail may be
+        * read by i915_gem_request_retire() just as it is being updated
+        * by execlists, as although the breadcrumb is complete, the context
+        * switch hasn't been seen.
+        */
+       assert_ring_tail_valid(ring, tail);
+       ring->tail = tail;
+       return tail;
+}
  
  void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
  
@@ -551,7 +597,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine);
  
  int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
  
@@ -652,7 +697,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
                            struct intel_wait *wait);
  void intel_engine_remove_wait(struct intel_engine_cs *engine,
                               struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+                                  bool wakeup);
  void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
  
  static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
@@ -685,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
  bool intel_engine_is_idle(struct intel_engine_cs *engine);
  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
  
+void intel_engines_mark_idle(struct drm_i915_private *i915);
  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
  
  #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c

index ef6fa87b2f8aa85c96b55b33de90afbae06e2a6b..6cc181203135e09f4f90eb0f8bef8a169ea6fd79 100644 (file)
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2875,11 +2875,10 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo,
  
         BUILD_BUG_ON(sizeof(enhancements) != 2);
  
-       enhancements.response = 0;
-       intel_sdvo_get_value(intel_sdvo,
-                            SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
-                            &enhancements, sizeof(enhancements));
-       if (enhancements.response == 0) {
+       if (!intel_sdvo_get_value(intel_sdvo,
+                                 SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+                                 &enhancements, sizeof(enhancements)) ||
+           enhancements.response == 0) {
                 DRM_DEBUG_KMS("No enhancement is supported\n");
                 return true;
         }
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c

index 191e14ddde0c621bc502737a29201106e7fe4063..c4bf19364e490c9358780b7d8872d681773aed69 100644 (file)
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -210,16 +210,14 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
  }
  
  static void
-skl_update_plane(struct drm_plane *drm_plane,
+skl_update_plane(struct intel_plane *plane,
                  const struct intel_crtc_state *crtc_state,
                  const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = drm_plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(drm_plane);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       enum plane_id plane_id = intel_plane->id;
-       enum pipe pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum plane_id plane_id = plane->id;
+       enum pipe pipe = plane->pipe;
         u32 plane_ctl = plane_state->ctl;
         const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
         u32 surf_addr = plane_state->main.offset;
@@ -288,13 +286,11 @@ skl_update_plane(struct drm_plane *drm_plane,
  }
  
  static void
-skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
+skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
  {
-       struct drm_device *dev = dplane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(dplane);
-       enum plane_id plane_id = intel_plane->id;
-       enum pipe pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum plane_id plane_id = plane->id;
+       enum pipe pipe = plane->pipe;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -308,10 +304,10 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
  }
  
  static void
-chv_update_csc(struct intel_plane *intel_plane, uint32_t format)
+chv_update_csc(struct intel_plane *plane, uint32_t format)
  {
-       struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
-       enum plane_id plane_id = intel_plane->id;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum plane_id plane_id = plane->id;
  
         /* Seems RGB data bypasses the CSC always */
         if (!format_is_yuv(format))
@@ -411,16 +407,14 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
  }
  
  static void
-vlv_update_plane(struct drm_plane *dplane,
+vlv_update_plane(struct intel_plane *plane,
                  const struct intel_crtc_state *crtc_state,
                  const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = dplane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(dplane);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       enum pipe pipe = intel_plane->pipe;
-       enum plane_id plane_id = intel_plane->id;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum pipe pipe = plane->pipe;
+       enum plane_id plane_id = plane->id;
         u32 sprctl = plane_state->ctl;
         u32 sprsurf_offset = plane_state->main.offset;
         u32 linear_offset;
@@ -442,7 +436,7 @@ vlv_update_plane(struct drm_plane *dplane,
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
         if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B)
-               chv_update_csc(intel_plane, fb->format->format);
+               chv_update_csc(plane, fb->format->format);
  
         if (key->flags) {
                 I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value);
@@ -469,13 +463,11 @@ vlv_update_plane(struct drm_plane *dplane,
  }
  
  static void
-vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
+vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
  {
-       struct drm_device *dev = dplane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(dplane);
-       enum pipe pipe = intel_plane->pipe;
-       enum plane_id plane_id = intel_plane->id;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum pipe pipe = plane->pipe;
+       enum plane_id plane_id = plane->id;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -545,15 +537,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
  }
  
  static void
-ivb_update_plane(struct drm_plane *plane,
+ivb_update_plane(struct intel_plane *plane,
                  const struct intel_crtc_state *crtc_state,
                  const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       enum pipe pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum pipe pipe = plane->pipe;
         u32 sprctl = plane_state->ctl, sprscale = 0;
         u32 sprsurf_offset = plane_state->main.offset;
         u32 linear_offset;
@@ -600,7 +590,7 @@ ivb_update_plane(struct drm_plane *plane,
                 I915_WRITE_FW(SPRLINOFF(pipe), linear_offset);
  
         I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w);
-       if (intel_plane->can_scale)
+       if (plane->can_scale)
                 I915_WRITE_FW(SPRSCALE(pipe), sprscale);
         I915_WRITE_FW(SPRCTL(pipe), sprctl);
         I915_WRITE_FW(SPRSURF(pipe),
@@ -611,19 +601,17 @@ ivb_update_plane(struct drm_plane *plane,
  }
  
  static void
-ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
+ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
  {
-       struct drm_device *dev = plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(plane);
-       int pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum pipe pipe = plane->pipe;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
         I915_WRITE_FW(SPRCTL(pipe), 0);
         /* Can't leave the scaler enabled... */
-       if (intel_plane->can_scale)
+       if (plane->can_scale)
                 I915_WRITE_FW(SPRSCALE(pipe), 0);
  
         I915_WRITE_FW(SPRSURF(pipe), 0);
@@ -632,7 +620,7 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  }
  
-static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
+static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
                           const struct intel_plane_state *plane_state)
  {
         struct drm_i915_private *dev_priv =
@@ -686,15 +674,13 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
  }
  
  static void
-ilk_update_plane(struct drm_plane *plane,
+g4x_update_plane(struct intel_plane *plane,
                  const struct intel_crtc_state *crtc_state,
                  const struct intel_plane_state *plane_state)
  {
-       struct drm_device *dev = plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(plane);
-       struct drm_framebuffer *fb = plane_state->base.fb;
-       int pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       const struct drm_framebuffer *fb = plane_state->base.fb;
+       enum pipe pipe = plane->pipe;
         u32 dvscntr = plane_state->ctl, dvsscale = 0;
         u32 dvssurf_offset = plane_state->main.offset;
         u32 linear_offset;
@@ -747,12 +733,10 @@ ilk_update_plane(struct drm_plane *plane,
  }
  
  static void
-ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
+g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
  {
-       struct drm_device *dev = plane->dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct intel_plane *intel_plane = to_intel_plane(plane);
-       int pipe = intel_plane->pipe;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       enum pipe pipe = plane->pipe;
         unsigned long irqflags;
  
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -768,14 +752,12 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
  }
  
  static int
-intel_check_sprite_plane(struct drm_plane *plane,
+intel_check_sprite_plane(struct intel_plane *plane,
                          struct intel_crtc_state *crtc_state,
                          struct intel_plane_state *state)
  {
-       struct drm_i915_private *dev_priv = to_i915(plane->dev);
-       struct drm_crtc *crtc = state->base.crtc;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct intel_plane *intel_plane = to_intel_plane(plane);
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
         struct drm_framebuffer *fb = state->base.fb;
         int crtc_x, crtc_y;
         unsigned int crtc_w, crtc_h;
@@ -797,7 +779,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
         }
  
         /* Don't modify another pipe's plane */
-       if (intel_plane->pipe != intel_crtc->pipe) {
+       if (plane->pipe != crtc->pipe) {
                 DRM_DEBUG_KMS("Wrong plane <-> crtc mapping\n");
                 return -EINVAL;
         }
@@ -814,16 +796,16 @@ intel_check_sprite_plane(struct drm_plane *plane,
                 if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
                         can_scale = 1;
                         min_scale = 1;
-                       max_scale = skl_max_scale(intel_crtc, crtc_state);
+                       max_scale = skl_max_scale(crtc, crtc_state);
                 } else {
                         can_scale = 0;
                         min_scale = DRM_PLANE_HELPER_NO_SCALING;
                         max_scale = DRM_PLANE_HELPER_NO_SCALING;
                 }
         } else {
-               can_scale = intel_plane->can_scale;
-               max_scale = intel_plane->max_downscale << 16;
-               min_scale = intel_plane->can_scale ? 1 : (1 << 16);
+               can_scale = plane->can_scale;
+               max_scale = plane->max_downscale << 16;
+               min_scale = plane->can_scale ? 1 : (1 << 16);
         }
  
         /*
@@ -967,7 +949,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
                 if (ret)
                         return ret;
  
-               state->ctl = ilk_sprite_ctl(crtc_state, state);
+               state->ctl = g4x_sprite_ctl(crtc_state, state);
         }
  
         return 0;
@@ -1027,7 +1009,7 @@ out:
         return ret;
  }
  
-static const uint32_t ilk_plane_formats[] = {
+static const uint32_t g4x_plane_formats[] = {
         DRM_FORMAT_XRGB8888,
         DRM_FORMAT_YUYV,
         DRM_FORMAT_YVYU,
@@ -1131,15 +1113,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
                 intel_plane->can_scale = true;
                 intel_plane->max_downscale = 16;
  
-               intel_plane->update_plane = ilk_update_plane;
-               intel_plane->disable_plane = ilk_disable_plane;
+               intel_plane->update_plane = g4x_update_plane;
+               intel_plane->disable_plane = g4x_disable_plane;
  
                 if (IS_GEN6(dev_priv)) {
                         plane_formats = snb_plane_formats;
                         num_plane_formats = ARRAY_SIZE(snb_plane_formats);
                 } else {
-                       plane_formats = ilk_plane_formats;
-                       num_plane_formats = ARRAY_SIZE(ilk_plane_formats);
+                       plane_formats = g4x_plane_formats;
+                       num_plane_formats = ARRAY_SIZE(g4x_plane_formats);
                 }
         }
  
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c

index e077c2a9e69470633fc54e7ebc3089e16d8e36fd..784df024e23056ce08c7ec2ca53f23d8e143886f 100644 (file)
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -48,41 +48,6 @@ struct intel_tv {
         struct intel_encoder base;
  
         int type;
-       const char *tv_format;
-       int margin[4];
-       u32 save_TV_H_CTL_1;
-       u32 save_TV_H_CTL_2;
-       u32 save_TV_H_CTL_3;
-       u32 save_TV_V_CTL_1;
-       u32 save_TV_V_CTL_2;
-       u32 save_TV_V_CTL_3;
-       u32 save_TV_V_CTL_4;
-       u32 save_TV_V_CTL_5;
-       u32 save_TV_V_CTL_6;
-       u32 save_TV_V_CTL_7;
-       u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3;
-
-       u32 save_TV_CSC_Y;
-       u32 save_TV_CSC_Y2;
-       u32 save_TV_CSC_U;
-       u32 save_TV_CSC_U2;
-       u32 save_TV_CSC_V;
-       u32 save_TV_CSC_V2;
-       u32 save_TV_CLR_KNOBS;
-       u32 save_TV_CLR_LEVEL;
-       u32 save_TV_WIN_POS;
-       u32 save_TV_WIN_SIZE;
-       u32 save_TV_FILTER_CTL_1;
-       u32 save_TV_FILTER_CTL_2;
-       u32 save_TV_FILTER_CTL_3;
-
-       u32 save_TV_H_LUMA[60];
-       u32 save_TV_H_CHROMA[60];
-       u32 save_TV_V_LUMA[43];
-       u32 save_TV_V_CHROMA[43];
-
-       u32 save_TV_DAC;
-       u32 save_TV_CTL;
  };
  
  struct video_levels {
@@ -873,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder,
         I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE);
  }
  
-static const struct tv_mode *
-intel_tv_mode_lookup(const char *tv_format)
+static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state)
  {
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
-               const struct tv_mode *tv_mode = &tv_modes[i];
+       int format = conn_state->tv.mode;
  
-               if (!strcmp(tv_format, tv_mode->name))
-                       return tv_mode;
-       }
-       return NULL;
-}
-
-static const struct tv_mode *
-intel_tv_mode_find(struct intel_tv *intel_tv)
-{
-       return intel_tv_mode_lookup(intel_tv->tv_format);
+       return &tv_modes[format];
  }
  
  static enum drm_mode_status
  intel_tv_mode_valid(struct drm_connector *connector,
                     struct drm_display_mode *mode)
  {
-       struct intel_tv *intel_tv = intel_attached_tv(connector);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+       const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
         int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
  
         if (mode->clock > max_dotclk)
@@ -925,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
                         struct intel_crtc_state *pipe_config,
                         struct drm_connector_state *conn_state)
  {
-       struct intel_tv *intel_tv = enc_to_tv(encoder);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+       const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
  
         if (!tv_mode)
                 return false;
@@ -1032,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
         struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
         struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
         struct intel_tv *intel_tv = enc_to_tv(encoder);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+       const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
         u32 tv_ctl;
         u32 scctl1, scctl2, scctl3;
         int i, j;
@@ -1135,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
         else
                 ysize = 2*tv_mode->nbr_end + 1;
  
-       xpos += intel_tv->margin[TV_MARGIN_LEFT];
-       ypos += intel_tv->margin[TV_MARGIN_TOP];
-       xsize -= (intel_tv->margin[TV_MARGIN_LEFT] +
-                 intel_tv->margin[TV_MARGIN_RIGHT]);
-       ysize -= (intel_tv->margin[TV_MARGIN_TOP] +
-                 intel_tv->margin[TV_MARGIN_BOTTOM]);
+       xpos += conn_state->tv.margins.left;
+       ypos += conn_state->tv.margins.top;
+       xsize -= (conn_state->tv.margins.left +
+                 conn_state->tv.margins.right);
+       ysize -= (conn_state->tv.margins.top +
+                 conn_state->tv.margins.bottom);
         I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
         I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
  
@@ -1288,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
  static void intel_tv_find_better_format(struct drm_connector *connector)
  {
         struct intel_tv *intel_tv = intel_attached_tv(connector);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+       const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
         int i;
  
         if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
@@ -1304,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
                         break;
         }
  
-       intel_tv->tv_format = tv_mode->name;
-       drm_object_property_set_value(&connector->base,
-               connector->dev->mode_config.tv_mode_property, i);
+       connector->state->tv.mode = i;
  }
  
  /**
@@ -1347,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector,
                                 connector_status_connected;
                 } else
                         status = connector_status_unknown;
-       } else
-               return connector->status;
  
-       if (status != connector_status_connected)
-               return status;
-
-       intel_tv->type = type;
-       intel_tv_find_better_format(connector);
+               if (status == connector_status_connected) {
+                       intel_tv->type = type;
+                       intel_tv_find_better_format(connector);
+               }
  
-       return connector_status_connected;
+               return status;
+       } else
+               return connector->status;
  }
  
  static const struct input_res {
@@ -1376,12 +1323,9 @@ static const struct input_res {
   * Chose preferred mode  according to line number of TV format
   */
  static void
-intel_tv_chose_preferred_modes(struct drm_connector *connector,
+intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
                                struct drm_display_mode *mode_ptr)
  {
-       struct intel_tv *intel_tv = intel_attached_tv(connector);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
-
         if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
                 mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
         else if (tv_mode->nbr_end > 480) {
@@ -1404,8 +1348,7 @@ static int
  intel_tv_get_modes(struct drm_connector *connector)
  {
         struct drm_display_mode *mode_ptr;
-       struct intel_tv *intel_tv = intel_attached_tv(connector);
-       const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+       const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
         int j, count = 0;
         u64 tmp;
  
@@ -1448,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector)
                 mode_ptr->clock = (int) tmp;
  
                 mode_ptr->type = DRM_MODE_TYPE_DRIVER;
-               intel_tv_chose_preferred_modes(connector, mode_ptr);
+               intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
                 drm_mode_probed_add(connector, mode_ptr);
                 count++;
         }
@@ -1463,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector)
         kfree(connector);
  }
  
-
-static int
-intel_tv_set_property(struct drm_connector *connector, struct drm_property *property,
-                     uint64_t val)
-{
-       struct drm_device *dev = connector->dev;
-       struct intel_tv *intel_tv = intel_attached_tv(connector);
-       struct drm_crtc *crtc = intel_tv->base.base.crtc;
-       int ret = 0;
-       bool changed = false;
-
-       ret = drm_object_property_set_value(&connector->base, property, val);
-       if (ret < 0)
-               goto out;
-
-       if (property == dev->mode_config.tv_left_margin_property &&
-               intel_tv->margin[TV_MARGIN_LEFT] != val) {
-               intel_tv->margin[TV_MARGIN_LEFT] = val;
-               changed = true;
-       } else if (property == dev->mode_config.tv_right_margin_property &&
-               intel_tv->margin[TV_MARGIN_RIGHT] != val) {
-               intel_tv->margin[TV_MARGIN_RIGHT] = val;
-               changed = true;
-       } else if (property == dev->mode_config.tv_top_margin_property &&
-               intel_tv->margin[TV_MARGIN_TOP] != val) {
-               intel_tv->margin[TV_MARGIN_TOP] = val;
-               changed = true;
-       } else if (property == dev->mode_config.tv_bottom_margin_property &&
-               intel_tv->margin[TV_MARGIN_BOTTOM] != val) {
-               intel_tv->margin[TV_MARGIN_BOTTOM] = val;
-               changed = true;
-       } else if (property == dev->mode_config.tv_mode_property) {
-               if (val >= ARRAY_SIZE(tv_modes)) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-               if (!strcmp(intel_tv->tv_format, tv_modes[val].name))
-                       goto out;
-
-               intel_tv->tv_format = tv_modes[val].name;
-               changed = true;
-       } else {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       if (changed && crtc)
-               intel_crtc_restore_mode(crtc);
-out:
-       return ret;
-}
-
  static const struct drm_connector_funcs intel_tv_connector_funcs = {
         .dpms = drm_atomic_helper_connector_dpms,
         .late_register = intel_connector_register,
         .early_unregister = intel_connector_unregister,
         .destroy = intel_tv_destroy,
-       .set_property = intel_tv_set_property,
-       .atomic_get_property = intel_connector_atomic_get_property,
+       .set_property = drm_atomic_helper_connector_set_property,
         .fill_modes = drm_helper_probe_single_connector_modes,
         .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
         .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
  };
  
+static int intel_tv_atomic_check(struct drm_connector *connector,
+                                struct drm_connector_state *new_state)
+{
+       struct drm_crtc_state *new_crtc_state;
+       struct drm_connector_state *old_state;
+
+       if (!new_state->crtc)
+               return 0;
+
+       old_state = drm_atomic_get_old_connector_state(new_state->state, connector);
+       new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
+
+       if (old_state->tv.mode != new_state->tv.mode ||
+           old_state->tv.margins.left != new_state->tv.margins.left ||
+           old_state->tv.margins.right != new_state->tv.margins.right ||
+           old_state->tv.margins.top != new_state->tv.margins.top ||
+           old_state->tv.margins.bottom != new_state->tv.margins.bottom) {
+               /* Force a modeset. */
+
+               new_crtc_state->connectors_changed = true;
+       }
+
+       return 0;
+}
+
  static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
         .detect_ctx = intel_tv_detect,
         .mode_valid = intel_tv_mode_valid,
         .get_modes = intel_tv_get_modes,
+       .atomic_check = intel_tv_atomic_check,
  };
  
  static const struct drm_encoder_funcs intel_tv_enc_funcs = {
@@ -1548,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
         u32 tv_dac_on, tv_dac_off, save_tv_dac;
         const char *tv_format_names[ARRAY_SIZE(tv_modes)];
         int i, initial_mode = 0;
+       struct drm_connector_state *state;
  
         if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED)
                 return;
@@ -1593,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
  
         intel_encoder = &intel_tv->base;
         connector = &intel_connector->base;
+       state = connector->state;
  
         /* The documentation, for the older chipsets at least, recommend
          * using a polling method rather than hotplug detection for TVs.
@@ -1630,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv)
         intel_tv->type = DRM_MODE_CONNECTOR_Unknown;
  
         /* BIOS margin values */
-       intel_tv->margin[TV_MARGIN_LEFT] = 54;
-       intel_tv->margin[TV_MARGIN_TOP] = 36;
-       intel_tv->margin[TV_MARGIN_RIGHT] = 46;
-       intel_tv->margin[TV_MARGIN_BOTTOM] = 37;
+       state->tv.margins.left = 54;
+       state->tv.margins.top = 36;
+       state->tv.margins.right = 46;
+       state->tv.margins.bottom = 37;
  
-       intel_tv->tv_format = tv_modes[initial_mode].name;
+       state->tv.mode = initial_mode;
  
         drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
         connector->interlace_allowed = false;
@@ -1649,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv)
                                       tv_format_names);
  
         drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
-                                  initial_mode);
+                                  state->tv.mode);
         drm_object_attach_property(&connector->base,
                                    dev->mode_config.tv_left_margin_property,
-                                  intel_tv->margin[TV_MARGIN_LEFT]);
+                                  state->tv.margins.left);
         drm_object_attach_property(&connector->base,
                                    dev->mode_config.tv_top_margin_property,
-                                  intel_tv->margin[TV_MARGIN_TOP]);
+                                  state->tv.margins.top);
         drm_object_attach_property(&connector->base,
                                    dev->mode_config.tv_right_margin_property,
-                                  intel_tv->margin[TV_MARGIN_RIGHT]);
+                                  state->tv.margins.right);
         drm_object_attach_property(&connector->base,
                                    dev->mode_config.tv_bottom_margin_property,
-                                  intel_tv->margin[TV_MARGIN_BOTTOM]);
+                                  state->tv.margins.bottom);
  }
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c

index c117424f1f50e1a4bde7d5467fb0a4d31ac9d6e8..7a7b07de28a3dd9e2c63e6d77dba5ccc50f41e08 100644 (file)
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -94,12 +94,22 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
                 i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
  }
  
+static void guc_write_irq_trigger(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+       I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+}
+
  void intel_uc_init_early(struct drm_i915_private *dev_priv)
  {
         struct intel_guc *guc = &dev_priv->guc;
  
+       intel_guc_ct_init_early(&guc->ct);
+
         mutex_init(&guc->send_mutex);
-       guc->send = intel_guc_send_mmio;
+       guc->send = intel_guc_send_nop;
+       guc->notify = guc_write_irq_trigger;
  }
  
  static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -252,13 +262,81 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
         __intel_uc_fw_fini(&dev_priv->huc.fw);
  }
  
+static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
+{
+       GEM_BUG_ON(!guc->send_regs.base);
+       GEM_BUG_ON(!guc->send_regs.count);
+       GEM_BUG_ON(i >= guc->send_regs.count);
+
+       return _MMIO(guc->send_regs.base + 4 * i);
+}
+
+static void guc_init_send_regs(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+       enum forcewake_domains fw_domains = 0;
+       unsigned int i;
+
+       guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
+       guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
+
+       for (i = 0; i < guc->send_regs.count; i++) {
+               fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+                                       guc_send_reg(guc, i),
+                                       FW_REG_READ | FW_REG_WRITE);
+       }
+       guc->send_regs.fw_domains = fw_domains;
+}
+
+static void guc_capture_load_err_log(struct intel_guc *guc)
+{
+       if (!guc->log.vma || i915.guc_log_level < 0)
+               return;
+
+       if (!guc->load_err_log)
+               guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
+
+       return;
+}
+
+static void guc_free_load_err_log(struct intel_guc *guc)
+{
+       if (guc->load_err_log)
+               i915_gem_object_put(guc->load_err_log);
+}
+
+static int guc_enable_communication(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+       guc_init_send_regs(guc);
+
+       if (HAS_GUC_CT(dev_priv))
+               return intel_guc_enable_ct(guc);
+
+       guc->send = intel_guc_send_mmio;
+       return 0;
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+       struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+       if (HAS_GUC_CT(dev_priv))
+               intel_guc_disable_ct(guc);
+
+       guc->send = intel_guc_send_nop;
+}
+
  int intel_uc_init_hw(struct drm_i915_private *dev_priv)
  {
+       struct intel_guc *guc = &dev_priv->guc;
         int ret, attempts;
  
         if (!i915.enable_guc_loading)
                 return 0;
  
+       guc_disable_communication(guc);
         gen9_reset_guc_interrupts(dev_priv);
  
         /* We need to notify the guc whenever we change the GGTT */
@@ -274,6 +352,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
                         goto err_guc;
         }
  
+       /* init WOPCM */
+       I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
+       I915_WRITE(DMA_GUC_WOPCM_OFFSET,
+                  GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
+
         /* WaEnableuKernelHeaderValidFix:skl */
         /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
         if (IS_GEN9(dev_priv))
@@ -301,7 +384,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
  
         /* Did we succeded or run out of retries? */
         if (ret)
-               goto err_submission;
+               goto err_log_capture;
+
+       ret = guc_enable_communication(guc);
+       if (ret)
+               goto err_log_capture;
  
         intel_guc_auth_huc(dev_priv);
         if (i915.enable_guc_submission) {
@@ -325,7 +412,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
          * marks the GPU as wedged until reset).
          */
  err_interrupts:
+       guc_disable_communication(guc);
         gen9_disable_guc_interrupts(dev_priv);
+err_log_capture:
+       guc_capture_load_err_log(guc);
  err_submission:
         if (i915.enable_guc_submission)
                 i915_guc_submission_fini(dev_priv);
@@ -351,25 +441,25 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
         if (!i915.enable_guc_loading)
                 return;
  
-       if (i915.enable_guc_submission) {
+       guc_free_load_err_log(&dev_priv->guc);
+
+       if (i915.enable_guc_submission)
                 i915_guc_submission_disable(dev_priv);
+
+       guc_disable_communication(&dev_priv->guc);
+
+       if (i915.enable_guc_submission) {
                 gen9_disable_guc_interrupts(dev_priv);
                 i915_guc_submission_fini(dev_priv);
         }
+
         i915_ggtt_disable_guc(dev_priv);
  }
  
-/*
- * Read GuC command/status register (SOFT_SCRATCH_0)
- * Return true if it contains a response rather than a command
- */
-static bool guc_recv(struct intel_guc *guc, u32 *status)
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
  {
-       struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-       u32 val = I915_READ(SOFT_SCRATCH(0));
-       *status = val;
-       return INTEL_GUC_RECV_IS_RESPONSE(val);
+       WARN(1, "Unexpected send: action=%#x\n", *action);
+       return -ENODEV;
  }
  
  /*
@@ -382,30 +472,33 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
         int i;
         int ret;
  
-       if (WARN_ON(len < 1 || len > 15))
-               return -EINVAL;
+       GEM_BUG_ON(!len);
+       GEM_BUG_ON(len > guc->send_regs.count);
  
-       mutex_lock(&guc->send_mutex);
-       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER);
+       /* If CT is available, we expect to use MMIO only during init/fini */
+       GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
+               *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+               *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
  
-       dev_priv->guc.action_count += 1;
-       dev_priv->guc.action_cmd = action[0];
+       mutex_lock(&guc->send_mutex);
+       intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
  
         for (i = 0; i < len; i++)
-               I915_WRITE(SOFT_SCRATCH(i), action[i]);
+               I915_WRITE(guc_send_reg(guc, i), action[i]);
  
-       POSTING_READ(SOFT_SCRATCH(i - 1));
+       POSTING_READ(guc_send_reg(guc, i - 1));
  
-       I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+       intel_guc_notify(guc);
  
         /*
-        * Fast commands should complete in less than 10us, so sample quickly
-        * up to that length of time, then switch to a slower sleep-wait loop.
-        * No inte_guc_send command should ever take longer than 10ms.
+        * No GuC command should ever take longer than 10ms.
+        * Fast commands should still complete in 10us.
          */
-       ret = wait_for_us(guc_recv(guc, &status), 10);
-       if (ret)
-               ret = wait_for(guc_recv(guc, &status), 10);
+       ret = __intel_wait_for_register_fw(dev_priv,
+                                          guc_send_reg(guc, 0),
+                                          INTEL_GUC_RECV_MASK,
+                                          INTEL_GUC_RECV_MASK,
+                                          10, 10, &status);
         if (status != INTEL_GUC_STATUS_SUCCESS) {
                 /*
                  * Either the GuC explicitly returned an error (which
@@ -418,13 +511,9 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
                 DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
                          " ret=%d status=0x%08X response=0x%08X\n",
                          action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
-
-               dev_priv->guc.action_fail += 1;
-               dev_priv->guc.action_err = ret;
         }
-       dev_priv->guc.action_status = status;
  
-       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
+       intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
         mutex_unlock(&guc->send_mutex);
  
         return ret;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h

index 4b7f73aeddac6475db31d184853f833c8ba3d510..69daf4c01cd0ed676842955d5cc7aee2e42e3917 100644 (file)
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -27,7 +27,7 @@
  #include "intel_guc_fwif.h"
  #include "i915_guc_reg.h"
  #include "intel_ringbuffer.h"
-
+#include "intel_guc_ct.h"
  #include "i915_vma.h"
  
  struct drm_i915_gem_request;
@@ -59,12 +59,6 @@ struct drm_i915_gem_request;
   *                available in the work queue (note, the queue is shared,
   *                not per-engine). It is OK for this to be nonzero, but
   *                it should not be huge!
- *   q_fail: failed to enqueue a work item. This should never happen,
- *           because we check for space beforehand.
- *   b_fail: failed to ring the doorbell. This should never happen, unless
- *           somehow the hardware misbehaves, or maybe if the GuC firmware
- *           crashes? We probably need to reset the GPU to recover.
- *   retcode: errno from last guc_submit()
   */
  struct i915_guc_client {
         struct i915_vma *vma;
@@ -87,8 +81,6 @@ struct i915_guc_client {
         uint32_t wq_tail;
         uint32_t wq_rsvd;
         uint32_t no_wq_space;
-       uint32_t b_fail;
-       int retcode;
  
         /* Per-engine counts of GuC submissions */
         uint64_t submissions[I915_NUM_ENGINES];
@@ -181,6 +173,10 @@ struct intel_guc_log {
  struct intel_guc {
         struct intel_uc_fw fw;
         struct intel_guc_log log;
+       struct intel_guc_ct ct;
+
+       /* Log snapshot if GuC errors during load */
+       struct drm_i915_gem_object *load_err_log;
  
         /* intel_guc_recv interrupt related state */
         bool interrupts_enabled;
@@ -195,21 +191,21 @@ struct intel_guc {
         DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
         uint32_t db_cacheline;          /* Cyclic counter mod pagesize  */
  
-       /* Action status & statistics */
-       uint64_t action_count;          /* Total commands issued        */
-       uint32_t action_cmd;            /* Last command word            */
-       uint32_t action_status;         /* Last return status           */
-       uint32_t action_fail;           /* Total number of failures     */
-       int32_t action_err;             /* Last error code              */
-
-       uint64_t submissions[I915_NUM_ENGINES];
-       uint32_t last_seqno[I915_NUM_ENGINES];
+       /* GuC's FW specific registers used in MMIO send */
+       struct {
+               u32 base;
+               unsigned int count;
+               enum forcewake_domains fw_domains;
+       } send_regs;
  
         /* To serialize the intel_guc_send actions */
         struct mutex send_mutex;
  
         /* GuC's FW specific send function */
         int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
+
+       /* GuC's FW specific notify function */
+       void (*notify)(struct intel_guc *guc);
  };
  
  struct intel_huc {
@@ -227,12 +223,19 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
  int intel_uc_init_hw(struct drm_i915_private *dev_priv);
  void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
  int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
  int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+
  static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
  {
         return guc->send(guc, action, len);
  }
  
+static inline void intel_guc_notify(struct intel_guc *guc)
+{
+       guc->notify(guc);
+}
+
  /* intel_guc_loader.c */
  int intel_guc_select_fw(struct intel_guc *guc);
  int intel_guc_init_hw(struct intel_guc *guc);
@@ -266,7 +269,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
  
  /* intel_huc.c */
  void intel_huc_select_fw(struct intel_huc *huc);
-int intel_huc_init_hw(struct intel_huc *huc);
+void intel_huc_init_hw(struct intel_huc *huc);
  void intel_guc_auth_huc(struct drm_i915_private *dev_priv);
  
  #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c

index 6d1ea26b2493baba7ac075b2594b6a6fae20b060..47d7ee1b5d864c679cea97821d3d2e2f6a1e4915 100644 (file)
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,6 +29,7 @@
  #include <linux/pm_runtime.h>
  
  #define FORCEWAKE_ACK_TIMEOUT_MS 50
+#define GT_FIFO_TIMEOUT_MS      10
  
  #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
  
@@ -172,22 +173,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv,
         __gen6_gt_wait_for_thread_c0(dev_priv);
  }
  
-static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
-{
-       u32 gtfifodbg;
-
-       gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
-       if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg))
-               __raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg);
-}
-
-static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv,
-                                    enum forcewake_domains fw_domains)
-{
-       fw_domains_put(dev_priv, fw_domains);
-       gen6_gt_check_fifodbg(dev_priv);
-}
-
  static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
  {
         u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL);
@@ -195,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
         return count & GT_FIFO_FREE_ENTRIES_MASK;
  }
  
-static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
+static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
  {
-       int ret = 0;
+       u32 n;
  
         /* On VLV, FIFO will be shared by both SW and HW.
          * So, we need to read the FREE_ENTRIES everytime */
         if (IS_VALLEYVIEW(dev_priv))
-               dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv);
-
-       if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
-               int loop = 500;
-               u32 fifo = fifo_free_entries(dev_priv);
-
-               while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
-                       udelay(10);
-                       fifo = fifo_free_entries(dev_priv);
+               n = fifo_free_entries(dev_priv);
+       else
+               n = dev_priv->uncore.fifo_count;
+
+       if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) {
+               if (wait_for_atomic((n = fifo_free_entries(dev_priv)) >
+                                   GT_FIFO_NUM_RESERVED_ENTRIES,
+                                   GT_FIFO_TIMEOUT_MS)) {
+                       DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
+                       return;
                 }
-               if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
-                       ++ret;
-               dev_priv->uncore.fifo_count = fifo;
         }
-       dev_priv->uncore.fifo_count--;
  
-       return ret;
+       dev_priv->uncore.fifo_count = n - 1;
  }
  
  static enum hrtimer_restart
@@ -232,6 +214,9 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
  
         assert_rpm_device_not_suspended(dev_priv);
  
+       if (xchg(&domain->active, false))
+               return HRTIMER_RESTART;
+
         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
         if (WARN_ON(domain->wake_count == 0))
                 domain->wake_count++;
@@ -262,6 +247,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
                 active_domains = 0;
  
                 for_each_fw_domain(domain, dev_priv, tmp) {
+                       smp_store_mb(domain->active, false);
                         if (hrtimer_cancel(&domain->timer) == 0)
                                 continue;
  
@@ -383,16 +369,36 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
         return true;
  }
  
+static bool
+gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv)
+{
+       u32 fifodbg;
+
+       fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
+
+       if (unlikely(fifodbg)) {
+               DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
+               __raw_i915_write32(dev_priv, GTFIFODBG, fifodbg);
+       }
+
+       return fifodbg;
+}
+
  static bool
  check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
  {
+       bool ret = false;
+
         if (HAS_FPGA_DBG_UNCLAIMED(dev_priv))
-               return fpga_check_for_unclaimed_mmio(dev_priv);
+               ret |= fpga_check_for_unclaimed_mmio(dev_priv);
  
         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-               return vlv_check_for_unclaimed_mmio(dev_priv);
+               ret |= vlv_check_for_unclaimed_mmio(dev_priv);
  
-       return false;
+       if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
+               ret |= gen6_check_for_fifo_debug(dev_priv);
+
+       return ret;
  }
  
  static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
@@ -404,11 +410,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
         if (check_for_unclaimed_mmio(dev_priv))
                 DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
  
-       /* clear out old GT FIFO errors */
-       if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
-               __raw_i915_write32(dev_priv, GTFIFODBG,
-                                  __raw_i915_read32(dev_priv, GTFIFODBG));
-
         /* WaDisableShadowRegForCpd:chv */
         if (IS_CHERRYVIEW(dev_priv)) {
                 __raw_i915_write32(dev_priv, GTFIFOCTL,
@@ -454,9 +455,12 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
  
         fw_domains &= dev_priv->uncore.fw_domains;
  
-       for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
-               if (domain->wake_count++)
+       for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) {
+               if (domain->wake_count++) {
                         fw_domains &= ~domain->mask;
+                       domain->active = true;
+               }
+       }
  
         if (fw_domains)
                 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@@ -521,8 +525,10 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
                 if (WARN_ON(domain->wake_count == 0))
                         continue;
  
-               if (--domain->wake_count)
+               if (--domain->wake_count) {
+                       domain->active = true;
                         continue;
+               }
  
                 fw_domain_arm_timer(domain);
         }
@@ -804,6 +810,18 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
         __unclaimed_reg_debug(dev_priv, reg, read, before);
  }
  
+enum decoupled_power_domain {
+       GEN9_DECOUPLED_PD_BLITTER = 0,
+       GEN9_DECOUPLED_PD_RENDER,
+       GEN9_DECOUPLED_PD_MEDIA,
+       GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+       GEN9_DECOUPLED_OP_WRITE = 0,
+       GEN9_DECOUPLED_OP_READ
+};
+
  static const enum decoupled_power_domain fw2dpd_domain[] = {
         GEN9_DECOUPLED_PD_RENDER,
         GEN9_DECOUPLED_PD_BLITTER,
@@ -1047,15 +1065,10 @@ __gen2_write(32)
  #define __gen6_write(x) \
  static void \
  gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
-       u32 __fifo_ret = 0; \
         GEN6_WRITE_HEADER; \
-       if (NEEDS_FORCE_WAKE(offset)) { \
-               __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-       } \
+       if (NEEDS_FORCE_WAKE(offset)) \
+               __gen6_gt_wait_for_fifo(dev_priv); \
         __raw_i915_write##x(dev_priv, reg, val); \
-       if (unlikely(__fifo_ret)) { \
-               gen6_gt_check_fifodbg(dev_priv); \
-       } \
         GEN6_WRITE_FOOTER; \
  }
  
@@ -1108,19 +1121,19 @@ __gen6_write(32)
  #undef GEN6_WRITE_FOOTER
  #undef GEN6_WRITE_HEADER
  
-#define ASSIGN_WRITE_MMIO_VFUNCS(x) \
+#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \
  do { \
-       dev_priv->uncore.funcs.mmio_writeb = x##_write8; \
-       dev_priv->uncore.funcs.mmio_writew = x##_write16; \
-       dev_priv->uncore.funcs.mmio_writel = x##_write32; \
+       (i915)->uncore.funcs.mmio_writeb = x##_write8; \
+       (i915)->uncore.funcs.mmio_writew = x##_write16; \
+       (i915)->uncore.funcs.mmio_writel = x##_write32; \
  } while (0)
  
-#define ASSIGN_READ_MMIO_VFUNCS(x) \
+#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \
  do { \
-       dev_priv->uncore.funcs.mmio_readb = x##_read8; \
-       dev_priv->uncore.funcs.mmio_readw = x##_read16; \
-       dev_priv->uncore.funcs.mmio_readl = x##_read32; \
-       dev_priv->uncore.funcs.mmio_readq = x##_read64; \
+       (i915)->uncore.funcs.mmio_readb = x##_read8; \
+       (i915)->uncore.funcs.mmio_readw = x##_read16; \
+       (i915)->uncore.funcs.mmio_readl = x##_read32; \
+       (i915)->uncore.funcs.mmio_readq = x##_read64; \
  } while (0)
  
  
@@ -1190,11 +1203,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
                                FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
                 dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
-               if (!IS_CHERRYVIEW(dev_priv))
-                       dev_priv->uncore.funcs.force_wake_put =
-                               fw_domains_put_with_fifo;
-               else
-                       dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+               dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
                                FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
                 fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA,
@@ -1202,11 +1211,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
                 dev_priv->uncore.funcs.force_wake_get =
                         fw_domains_get_with_thread_status;
-               if (IS_HASWELL(dev_priv))
-                       dev_priv->uncore.funcs.force_wake_put =
-                               fw_domains_put_with_fifo;
-               else
-                       dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+               dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
                                FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
         } else if (IS_IVYBRIDGE(dev_priv)) {
@@ -1223,8 +1228,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
                  */
                 dev_priv->uncore.funcs.force_wake_get =
                         fw_domains_get_with_thread_status;
-               dev_priv->uncore.funcs.force_wake_put =
-                       fw_domains_put_with_fifo;
+               dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
  
                 /* We need to init first for ECOBUS access and then
                  * determine later if we want to reinit, in case of MT access is
@@ -1242,7 +1246,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
                 spin_lock_irq(&dev_priv->uncore.lock);
                 fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
                 ecobus = __raw_i915_read32(dev_priv, ECOBUS);
-               fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER);
+               fw_domains_put(dev_priv, FORCEWAKE_RENDER);
                 spin_unlock_irq(&dev_priv->uncore.lock);
  
                 if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
@@ -1254,8 +1258,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
         } else if (IS_GEN6(dev_priv)) {
                 dev_priv->uncore.funcs.force_wake_get =
                         fw_domains_get_with_thread_status;
-               dev_priv->uncore.funcs.force_wake_put =
-                       fw_domains_put_with_fifo;
+               dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
                                FORCEWAKE, FORCEWAKE_ACK);
         }
@@ -1310,34 +1313,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
                 i915_pmic_bus_access_notifier;
  
         if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) {
-               ASSIGN_WRITE_MMIO_VFUNCS(gen2);
-               ASSIGN_READ_MMIO_VFUNCS(gen2);
+               ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2);
+               ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2);
         } else if (IS_GEN5(dev_priv)) {
-               ASSIGN_WRITE_MMIO_VFUNCS(gen5);
-               ASSIGN_READ_MMIO_VFUNCS(gen5);
+               ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5);
+               ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5);
         } else if (IS_GEN(dev_priv, 6, 7)) {
-               ASSIGN_WRITE_MMIO_VFUNCS(gen6);
+               ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6);
  
                 if (IS_VALLEYVIEW(dev_priv)) {
                         ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges);
-                       ASSIGN_READ_MMIO_VFUNCS(fwtable);
+                       ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
                 } else {
-                       ASSIGN_READ_MMIO_VFUNCS(gen6);
+                       ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
                 }
         } else if (IS_GEN8(dev_priv)) {
                 if (IS_CHERRYVIEW(dev_priv)) {
                         ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges);
-                       ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-                       ASSIGN_READ_MMIO_VFUNCS(fwtable);
+                       ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+                       ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
  
                 } else {
-                       ASSIGN_WRITE_MMIO_VFUNCS(gen8);
-                       ASSIGN_READ_MMIO_VFUNCS(gen6);
+                       ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8);
+                       ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
                 }
         } else {
                 ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
-               ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-               ASSIGN_READ_MMIO_VFUNCS(fwtable);
+               ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+               ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
                 if (HAS_DECOUPLED_MMIO(dev_priv)) {
                         dev_priv->uncore.funcs.mmio_readl =
                                                 gen9_decoupled_read32;
@@ -1353,8 +1356,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
  
         i915_check_and_clear_faults(dev_priv);
  }
-#undef ASSIGN_WRITE_MMIO_VFUNCS
-#undef ASSIGN_READ_MMIO_VFUNCS
  
  void intel_uncore_fini(struct drm_i915_private *dev_priv)
  {
@@ -1435,9 +1436,39 @@ out:
         return ret;
  }
  
-static int i915_reset_complete(struct pci_dev *pdev)
+static void gen3_stop_rings(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       for_each_engine(engine, dev_priv, id) {
+               const u32 base = engine->mmio_base;
+               const i915_reg_t mode = RING_MI_MODE(base);
+
+               I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
+               if (intel_wait_for_register_fw(dev_priv,
+                                              mode,
+                                              MODE_IDLE,
+                                              MODE_IDLE,
+                                              500))
+                       DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
+                                        engine->name);
+
+               I915_WRITE_FW(RING_CTL(base), 0);
+               I915_WRITE_FW(RING_HEAD(base), 0);
+               I915_WRITE_FW(RING_TAIL(base), 0);
+
+               /* Check acts as a post */
+               if (I915_READ_FW(RING_HEAD(base)) != 0)
+                       DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+                                        engine->name);
+       }
+}
+
+static bool i915_reset_complete(struct pci_dev *pdev)
  {
         u8 gdrst;
+
         pci_read_config_byte(pdev, I915_GDRST, &gdrst);
         return (gdrst & GRDOM_RESET_STATUS) == 0;
  }
@@ -1448,15 +1479,16 @@ static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask
  
         /* assert reset for at least 20 usec */
         pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-       udelay(20);
+       usleep_range(50, 200);
         pci_write_config_byte(pdev, I915_GDRST, 0);
  
         return wait_for(i915_reset_complete(pdev), 500);
  }
  
-static int g4x_reset_complete(struct pci_dev *pdev)
+static bool g4x_reset_complete(struct pci_dev *pdev)
  {
         u8 gdrst;
+
         pci_read_config_byte(pdev, I915_GDRST, &gdrst);
         return (gdrst & GRDOM_RESET_ENABLE) == 0;
  }
@@ -1464,6 +1496,10 @@ static int g4x_reset_complete(struct pci_dev *pdev)
  static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
  {
         struct pci_dev *pdev = dev_priv->drm.pdev;
+
+       /* Stop engines before we reset; see g4x_do_reset() below for why. */
+       gen3_stop_rings(dev_priv);
+
         pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
         return wait_for(g4x_reset_complete(pdev), 500);
  }
@@ -1473,29 +1509,41 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
         struct pci_dev *pdev = dev_priv->drm.pdev;
         int ret;
  
-       pci_write_config_byte(pdev, I915_GDRST,
-                             GRDOM_RENDER | GRDOM_RESET_ENABLE);
-       ret =  wait_for(g4x_reset_complete(pdev), 500);
-       if (ret)
-               return ret;
-
         /* WaVcpClkGateDisableForMediaReset:ctg,elk */
-       I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+       I915_WRITE(VDECCLK_GATE_D,
+                  I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
         POSTING_READ(VDECCLK_GATE_D);
  
+       /* We stop engines, otherwise we might get failed reset and a
+        * dead gpu (on elk).
+        * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+        */
+       gen3_stop_rings(dev_priv);
+
         pci_write_config_byte(pdev, I915_GDRST,
                               GRDOM_MEDIA | GRDOM_RESET_ENABLE);
         ret =  wait_for(g4x_reset_complete(pdev), 500);
-       if (ret)
-               return ret;
+       if (ret) {
+               DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+               goto out;
+       }
  
-       /* WaVcpClkGateDisableForMediaReset:ctg,elk */
-       I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-       POSTING_READ(VDECCLK_GATE_D);
+       pci_write_config_byte(pdev, I915_GDRST,
+                             GRDOM_RENDER | GRDOM_RESET_ENABLE);
+       ret =  wait_for(g4x_reset_complete(pdev), 500);
+       if (ret) {
+               DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+               goto out;
+       }
  
+out:
         pci_write_config_byte(pdev, I915_GDRST, 0);
  
-       return 0;
+       I915_WRITE(VDECCLK_GATE_D,
+                  I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+       POSTING_READ(VDECCLK_GATE_D);
+
+       return ret;
  }
  
  static int ironlake_do_reset(struct drm_i915_private *dev_priv,
@@ -1503,41 +1551,51 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
  {
         int ret;
  
-       I915_WRITE(ILK_GDSR,
-                  ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+       I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
         ret = intel_wait_for_register(dev_priv,
                                       ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
                                       500);
-       if (ret)
-               return ret;
+       if (ret) {
+               DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+               goto out;
+       }
  
-       I915_WRITE(ILK_GDSR,
-                  ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+       I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
         ret = intel_wait_for_register(dev_priv,
                                       ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
                                       500);
-       if (ret)
-               return ret;
+       if (ret) {
+               DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+               goto out;
+       }
  
+out:
         I915_WRITE(ILK_GDSR, 0);
-
-       return 0;
+       POSTING_READ(ILK_GDSR);
+       return ret;
  }
  
  /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
  static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
                                 u32 hw_domain_mask)
  {
+       int err;
+
         /* GEN6_GDRST is not in the gt power well, no need to check
          * for fifo space for the write or forcewake the chip for
          * the read
          */
         __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
  
-       /* Spin waiting for the device to ack the reset requests */
-       return intel_wait_for_register_fw(dev_priv,
+       /* Wait for the device to ack the reset requests */
+       err = intel_wait_for_register_fw(dev_priv,
                                           GEN6_GDRST, hw_domain_mask, 0,
                                           500);
+       if (err)
+               DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+                                hw_domain_mask);
+
+       return err;
  }
  
  /**
@@ -1585,19 +1643,23 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
  }
  
  /**
- * intel_wait_for_register_fw - wait until register matches expected state
+ * __intel_wait_for_register_fw - wait until register matches expected state
   * @dev_priv: the i915 device
   * @reg: the register to read
   * @mask: mask to apply to register value
   * @value: expected value
- * @timeout_ms: timeout in millisecond
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ * @out_value: optional placeholder to hold registry value
   *
   * This routine waits until the target register @reg contains the expected
   * @value after applying the @mask, i.e. it waits until ::
   *
   *     (I915_READ_FW(reg) & mask) == value
   *
- * Otherwise, the wait will timeout after @timeout_ms milliseconds.
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20,0000 microseconds.
   *
   * Note that this routine assumes the caller holds forcewake asserted, it is
   * not suitable for very long waits. See intel_wait_for_register() if you
@@ -1606,16 +1668,31 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
   *
   * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
   */
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-                              i915_reg_t reg,
-                              const u32 mask,
-                              const u32 value,
-                              const unsigned long timeout_ms)
-{
-#define done ((I915_READ_FW(reg) & mask) == value)
-       int ret = wait_for_us(done, 2);
-       if (ret)
-               ret = wait_for(done, timeout_ms);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+                                i915_reg_t reg,
+                                u32 mask,
+                                u32 value,
+                                unsigned int fast_timeout_us,
+                                unsigned int slow_timeout_ms,
+                                u32 *out_value)
+{
+       u32 uninitialized_var(reg_value);
+#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
+       int ret;
+
+       /* Catch any overuse of this function */
+       might_sleep_if(slow_timeout_ms);
+       GEM_BUG_ON(fast_timeout_us > 20000);
+
+       ret = -ETIMEDOUT;
+       if (fast_timeout_us && fast_timeout_us <= 20000)
+               ret = _wait_for_atomic(done, fast_timeout_us, 0);
+       if (ret && slow_timeout_ms)
+               ret = wait_for(done, slow_timeout_ms);
+
+       if (out_value)
+               *out_value = reg_value;
+
         return ret;
  #undef done
  }
@@ -1639,18 +1716,26 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
   */
  int intel_wait_for_register(struct drm_i915_private *dev_priv,
                             i915_reg_t reg,
-                           const u32 mask,
-                           const u32 value,
-                           const unsigned long timeout_ms)
+                           u32 mask,
+                           u32 value,
+                           unsigned int timeout_ms)
  {
-
         unsigned fw =
                 intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
         int ret;
  
-       intel_uncore_forcewake_get(dev_priv, fw);
-       ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2);
-       intel_uncore_forcewake_put(dev_priv, fw);
+       might_sleep();
+
+       spin_lock_irq(&dev_priv->uncore.lock);
+       intel_uncore_forcewake_get__locked(dev_priv, fw);
+
+       ret = __intel_wait_for_register_fw(dev_priv,
+                                          reg, mask, value,
+                                          2, 0, NULL);
+
+       intel_uncore_forcewake_put__locked(dev_priv, fw);
+       spin_unlock_irq(&dev_priv->uncore.lock);
+
         if (ret)
                 ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
                                timeout_ms);
@@ -1658,7 +1743,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
         return ret;
  }
  
-static int gen8_request_engine_reset(struct intel_engine_cs *engine)
+static int gen8_reset_engine_start(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
         int ret;
@@ -1677,7 +1762,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
         return ret;
  }
  
-static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine)
+static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
  {
         struct drm_i915_private *dev_priv = engine->i915;
  
@@ -1692,14 +1777,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
         unsigned int tmp;
  
         for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-               if (gen8_request_engine_reset(engine))
+               if (gen8_reset_engine_start(engine))
                         goto not_ready;
  
         return gen6_reset_engines(dev_priv, engine_mask);
  
  not_ready:
         for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-               gen8_unrequest_engine_reset(engine);
+               gen8_reset_engine_cancel(engine);
  
         return -EIO;
  }
@@ -1730,8 +1815,11 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
  int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
  {
         reset_func reset;
+       int retry;
         int ret;
  
+       might_sleep();
+
         reset = intel_get_gpu_reset(dev_priv);
         if (reset == NULL)
                 return -ENODEV;
@@ -1740,7 +1828,13 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
          * request may be dropped and never completes (causing -EIO).
          */
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-       ret = reset(dev_priv, engine_mask);
+       for (retry = 0; retry < 3; retry++) {
+               ret = reset(dev_priv, engine_mask);
+               if (ret != -ETIMEDOUT)
+                       break;
+
+               cond_resched();
+       }
         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  
         return ret;
@@ -1754,17 +1848,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
  int intel_guc_reset(struct drm_i915_private *dev_priv)
  {
         int ret;
-       unsigned long irqflags;
  
         if (!HAS_GUC(dev_priv))
                 return -EINVAL;
  
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-       spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
         ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
-
-       spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  
         return ret;
@@ -1873,5 +1962,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
  }
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_uncore.c"
  #include "selftests/intel_uncore.c"
  #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h

new file mode 100644 (file)

index 0000000..5f90278
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __INTEL_UNCORE_H__
+#define __INTEL_UNCORE_H__
+
+struct drm_i915_private;
+
+enum forcewake_domain_id {
+       FW_DOMAIN_ID_RENDER = 0,
+       FW_DOMAIN_ID_BLITTER,
+       FW_DOMAIN_ID_MEDIA,
+
+       FW_DOMAIN_ID_COUNT
+};
+
+enum forcewake_domains {
+       FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
+       FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
+       FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
+       FORCEWAKE_ALL = (FORCEWAKE_RENDER |
+                        FORCEWAKE_BLITTER |
+                        FORCEWAKE_MEDIA)
+};
+
+struct intel_uncore_funcs {
+       void (*force_wake_get)(struct drm_i915_private *dev_priv,
+                              enum forcewake_domains domains);
+       void (*force_wake_put)(struct drm_i915_private *dev_priv,
+                              enum forcewake_domains domains);
+
+       uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
+                              i915_reg_t r, bool trace);
+       uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
+                              i915_reg_t r, bool trace);
+       uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
+                              i915_reg_t r, bool trace);
+       uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
+                              i915_reg_t r, bool trace);
+
+       void (*mmio_writeb)(struct drm_i915_private *dev_priv,
+                           i915_reg_t r, uint8_t val, bool trace);
+       void (*mmio_writew)(struct drm_i915_private *dev_priv,
+                           i915_reg_t r, uint16_t val, bool trace);
+       void (*mmio_writel)(struct drm_i915_private *dev_priv,
+                           i915_reg_t r, uint32_t val, bool trace);
+};
+
+struct intel_forcewake_range {
+       u32 start;
+       u32 end;
+
+       enum forcewake_domains domains;
+};
+
+struct intel_uncore {
+       spinlock_t lock; /** lock is also taken in irq contexts. */
+
+       const struct intel_forcewake_range *fw_domains_table;
+       unsigned int fw_domains_table_entries;
+
+       struct notifier_block pmic_bus_access_nb;
+       struct intel_uncore_funcs funcs;
+
+       unsigned int fifo_count;
+
+       enum forcewake_domains fw_domains;
+       enum forcewake_domains fw_domains_active;
+
+       u32 fw_set;
+       u32 fw_clear;
+       u32 fw_reset;
+
+       struct intel_uncore_forcewake_domain {
+               enum forcewake_domain_id id;
+               enum forcewake_domains mask;
+               unsigned int wake_count;
+               bool active;
+               struct hrtimer timer;
+               i915_reg_t reg_set;
+               i915_reg_t reg_ack;
+       } fw_domain[FW_DOMAIN_ID_COUNT];
+
+       int unclaimed_mmio_check;
+};
+
+/* Iterate over initialised fw domains */
+#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
+       for (tmp__ = (mask__); \
+            tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
+
+#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
+       for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
+
+
+void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
+void intel_uncore_init(struct drm_i915_private *dev_priv);
+bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
+bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
+void intel_uncore_fini(struct drm_i915_private *dev_priv);
+void intel_uncore_suspend(struct drm_i915_private *dev_priv);
+void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
+
+u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
+void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
+const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
+
+enum forcewake_domains
+intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
+                              i915_reg_t reg, unsigned int op);
+#define FW_REG_READ  (1)
+#define FW_REG_WRITE (2)
+
+void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
+                               enum forcewake_domains domains);
+void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
+                               enum forcewake_domains domains);
+/* Like above but the caller must manage the uncore.lock itself.
+ * Must be used with I915_READ_FW and friends.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+                                       enum forcewake_domains domains);
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+                                       enum forcewake_domains domains);
+
+int intel_wait_for_register(struct drm_i915_private *dev_priv,
+                           i915_reg_t reg,
+                           u32 mask,
+                           u32 value,
+                           unsigned int timeout_ms);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+                                i915_reg_t reg,
+                                u32 mask,
+                                u32 value,
+                                unsigned int fast_timeout_us,
+                                unsigned int slow_timeout_ms,
+                                u32 *out_value);
+static inline
+int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+                              i915_reg_t reg,
+                              u32 mask,
+                              u32 value,
+                              unsigned int timeout_ms)
+{
+       return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
+                                           2, timeout_ms, NULL);
+}
+
+#endif /* !__INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c

index f08d0179b3df2e5485a77d4abcf67f1dfd8e967c..95d4aebc01817a7de3a0314679f23274fccc9650 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
         typeof(v) *map;
         int err;
  
-       /* XXX GTT write followed by WC write go missing */
-       i915_gem_object_flush_gtt_write_domain(obj);
-
-       err = i915_gem_object_set_to_gtt_domain(obj, true);
+       err = i915_gem_object_set_to_wc_domain(obj, true);
         if (err)
                 return err;
  
@@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
         typeof(v) map;
         int err;
  
-       /* XXX WC write followed by GTT write go missing */
-       i915_gem_object_flush_gtt_write_domain(obj);
-
-       err = i915_gem_object_set_to_gtt_domain(obj, false);
+       err = i915_gem_object_set_to_wc_domain(obj, false);
         if (err)
                 return err;
  
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c

index 1afb8b06e3e19bf23ed287277415afb364504b23..12b85b3278cd1cfc53b159253e9152e3d8f1784b 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
  static int igt_ctx_exec(void *arg)
  {
         struct drm_i915_private *i915 = arg;
-       struct drm_i915_gem_object *obj;
+       struct drm_i915_gem_object *obj = NULL;
         struct drm_file *file;
         IGT_TIMEOUT(end_time);
         LIST_HEAD(objects);
@@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg)
                 }
  
                 for_each_engine(engine, i915, id) {
-                       if (dw == 0) {
+                       if (!obj) {
                                 obj = create_test_object(ctx, file, &objects);
                                 if (IS_ERR(obj)) {
                                         err = PTR_ERR(obj);
@@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg)
                                 goto out_unlock;
                         }
  
-                       if (++dw == max_dwords(obj))
+                       if (++dw == max_dwords(obj)) {
+                               obj = NULL;
                                 dw = 0;
+                       }
                         ndwords++;
                 }
                 ncontexts++;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c

index 817bef74bbcbc437c96485e2da07b83f81cd6765..d15cc9d3a5cd1960f55a74ac0424c3e77f503f25 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -271,6 +271,105 @@ err_obj:
         return err;
  }
  
+static int igt_dmabuf_export_kmap(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct drm_i915_gem_object *obj;
+       struct dma_buf *dmabuf;
+       void *ptr;
+       int err;
+
+       obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+       i915_gem_object_put(obj);
+       if (IS_ERR(dmabuf)) {
+               err = PTR_ERR(dmabuf);
+               pr_err("i915_gem_prime_export failed with err=%d\n", err);
+               return err;
+       }
+
+       ptr = dma_buf_kmap(dmabuf, 0);
+       if (!ptr) {
+               pr_err("dma_buf_kmap failed\n");
+               err = -ENOMEM;
+               goto err;
+       }
+
+       if (memchr_inv(ptr, 0, PAGE_SIZE)) {
+               dma_buf_kunmap(dmabuf, 0, ptr);
+               pr_err("Exported page[0] not initialiased to zero!\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       memset(ptr, 0xc5, PAGE_SIZE);
+       dma_buf_kunmap(dmabuf, 0, ptr);
+
+       ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+       if (IS_ERR(ptr)) {
+               err = PTR_ERR(ptr);
+               pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
+               goto err;
+       }
+       memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
+       i915_gem_object_unpin_map(obj);
+
+       ptr = dma_buf_kmap(dmabuf, 1);
+       if (!ptr) {
+               pr_err("dma_buf_kmap failed\n");
+               err = -ENOMEM;
+               goto err;
+       }
+
+       if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
+               dma_buf_kunmap(dmabuf, 1, ptr);
+               pr_err("Exported page[1] not set to 0xaa!\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       memset(ptr, 0xc5, PAGE_SIZE);
+       dma_buf_kunmap(dmabuf, 1, ptr);
+
+       ptr = dma_buf_kmap(dmabuf, 0);
+       if (!ptr) {
+               pr_err("dma_buf_kmap failed\n");
+               err = -ENOMEM;
+               goto err;
+       }
+       if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
+               dma_buf_kunmap(dmabuf, 0, ptr);
+               pr_err("Exported page[0] did not retain 0xc5!\n");
+               err = -EINVAL;
+               goto err;
+       }
+       dma_buf_kunmap(dmabuf, 0, ptr);
+
+       ptr = dma_buf_kmap(dmabuf, 2);
+       if (ptr) {
+               pr_err("Erroneously kmapped beyond the end of the object!\n");
+               dma_buf_kunmap(dmabuf, 2, ptr);
+               err = -EINVAL;
+               goto err;
+       }
+
+       ptr = dma_buf_kmap(dmabuf, -1);
+       if (ptr) {
+               pr_err("Erroneously kmapped before the start of the object!\n");
+               dma_buf_kunmap(dmabuf, -1, ptr);
+               err = -EINVAL;
+               goto err;
+       }
+
+       err = 0;
+err:
+       dma_buf_put(dmabuf);
+       return err;
+}
+
  int i915_gem_dmabuf_mock_selftests(void)
  {
         static const struct i915_subtest tests[] = {
@@ -279,6 +378,7 @@ int i915_gem_dmabuf_mock_selftests(void)
                 SUBTEST(igt_dmabuf_import),
                 SUBTEST(igt_dmabuf_import_ownership),
                 SUBTEST(igt_dmabuf_export_vmap),
+               SUBTEST(igt_dmabuf_export_kmap),
         };
         struct drm_i915_private *i915;
         int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c

index 67d82bf1407f0043ea3121a6235a0a6fc5570f9f..8f011c447e4103464253d1cd1490065703f01bfb 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
                 if (offset >= obj->base.size)
                         continue;
  
-               i915_gem_object_flush_gtt_write_domain(obj);
+               flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
                 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
                 cpu = kmap(p) + offset_in_page(offset);
@@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
                 }
  
                 mutex_lock(&i915->drm.struct_mutex);
+               intel_runtime_pm_get(i915);
                 err = make_obj_busy(obj);
+               intel_runtime_pm_put(i915);
                 mutex_unlock(&i915->drm.struct_mutex);
                 if (err) {
                         pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c

index 98b7aac41eec7e526c9e3ac26ee95a72db09bc01..6664cb2eb0b8dd3cd12152b361004c19416f426a 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
         if (err)
                 goto err;
  
-       err = i915_gem_object_set_to_gtt_domain(obj, true);
+       err = i915_gem_object_set_to_wc_domain(obj, true);
         if (err)
                 goto err;
  
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c

new file mode 100644 (file)

index 0000000..7a44dab
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+#include "mock_timeline.h"
+
+struct __igt_sync {
+       const char *name;
+       u32 seqno;
+       bool expected;
+       bool set;
+};
+
+static int __igt_sync(struct intel_timeline *tl,
+                     u64 ctx,
+                     const struct __igt_sync *p,
+                     const char *name)
+{
+       int ret;
+
+       if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+               pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+                      name, p->name, ctx, p->seqno, yesno(p->expected));
+               return -EINVAL;
+       }
+
+       if (p->set) {
+               ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int igt_sync(void *arg)
+{
+       const struct __igt_sync pass[] = {
+               { "unset", 0, false, false },
+               { "new", 0, false, true },
+               { "0a", 0, true, true },
+               { "1a", 1, false, true },
+               { "1b", 1, true, true },
+               { "0b", 0, true, false },
+               { "2a", 2, false, true },
+               { "4", 4, false, true },
+               { "INT_MAX", INT_MAX, false, true },
+               { "INT_MAX-1", INT_MAX-1, true, false },
+               { "INT_MAX+1", (u32)INT_MAX+1, false, true },
+               { "INT_MAX", INT_MAX, true, false },
+               { "UINT_MAX", UINT_MAX, false, true },
+               { "wrap", 0, false, true },
+               { "unwrap", UINT_MAX, true, false },
+               {},
+       }, *p;
+       struct intel_timeline *tl;
+       int order, offset;
+       int ret;
+
+       tl = mock_timeline(0);
+       if (!tl)
+               return -ENOMEM;
+
+       for (p = pass; p->name; p++) {
+               for (order = 1; order < 64; order++) {
+                       for (offset = -1; offset <= (order > 1); offset++) {
+                               u64 ctx = BIT_ULL(order) + offset;
+
+                               ret = __igt_sync(tl, ctx, p, "1");
+                               if (ret)
+                                       goto out;
+                       }
+               }
+       }
+       mock_timeline_destroy(tl);
+
+       tl = mock_timeline(0);
+       if (!tl)
+               return -ENOMEM;
+
+       for (order = 1; order < 64; order++) {
+               for (offset = -1; offset <= (order > 1); offset++) {
+                       u64 ctx = BIT_ULL(order) + offset;
+
+                       for (p = pass; p->name; p++) {
+                               ret = __igt_sync(tl, ctx, p, "2");
+                               if (ret)
+                                       goto out;
+                       }
+               }
+       }
+
+out:
+       mock_timeline_destroy(tl);
+       return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+       return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
+}
+
+static int bench_sync(void *arg)
+{
+       struct rnd_state prng;
+       struct intel_timeline *tl;
+       unsigned long end_time, count;
+       u64 prng32_1M;
+       ktime_t kt;
+       int order, last_order;
+
+       tl = mock_timeline(0);
+       if (!tl)
+               return -ENOMEM;
+
+       /* Lookups from cache are very fast and so the random number generation
+        * and the loop itself becomes a significant factor in the per-iteration
+        * timings. We try to compensate the results by measuring the overhead
+        * of the prng and subtract it from the reported results.
+        */
+       prandom_seed_state(&prng, i915_selftest.random_seed);
+       count = 0;
+       kt = ktime_get();
+       end_time = jiffies + HZ/10;
+       do {
+               u32 x;
+
+               /* Make sure the compiler doesn't optimise away the prng call */
+               WRITE_ONCE(x, prandom_u32_state(&prng));
+
+               count++;
+       } while (!time_after(jiffies, end_time));
+       kt = ktime_sub(ktime_get(), kt);
+       pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+       prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
+
+       /* Benchmark (only) setting random context ids */
+       prandom_seed_state(&prng, i915_selftest.random_seed);
+       count = 0;
+       kt = ktime_get();
+       end_time = jiffies + HZ/10;
+       do {
+               u64 id = i915_prandom_u64_state(&prng);
+
+               __intel_timeline_sync_set(tl, id, 0);
+               count++;
+       } while (!time_after(jiffies, end_time));
+       kt = ktime_sub(ktime_get(), kt);
+       kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+       pr_info("%s: %lu random insertions, %lluns/insert\n",
+               __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+       /* Benchmark looking up the exact same context ids as we just set */
+       prandom_seed_state(&prng, i915_selftest.random_seed);
+       end_time = count;
+       kt = ktime_get();
+       while (end_time--) {
+               u64 id = i915_prandom_u64_state(&prng);
+
+               if (!__intel_timeline_sync_is_later(tl, id, 0)) {
+                       mock_timeline_destroy(tl);
+                       pr_err("Lookup of %llu failed\n", id);
+                       return -EINVAL;
+               }
+       }
+       kt = ktime_sub(ktime_get(), kt);
+       kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+       pr_info("%s: %lu random lookups, %lluns/lookup\n",
+               __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+       mock_timeline_destroy(tl);
+       cond_resched();
+
+       tl = mock_timeline(0);
+       if (!tl)
+               return -ENOMEM;
+
+       /* Benchmark setting the first N (in order) contexts */
+       count = 0;
+       kt = ktime_get();
+       end_time = jiffies + HZ/10;
+       do {
+               __intel_timeline_sync_set(tl, count++, 0);
+       } while (!time_after(jiffies, end_time));
+       kt = ktime_sub(ktime_get(), kt);
+       pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+               __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+       /* Benchmark looking up the exact same context ids as we just set */
+       end_time = count;
+       kt = ktime_get();
+       while (end_time--) {
+               if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
+                       pr_err("Lookup of %lu failed\n", end_time);
+                       mock_timeline_destroy(tl);
+                       return -EINVAL;
+               }
+       }
+       kt = ktime_sub(ktime_get(), kt);
+       pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+               __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+       mock_timeline_destroy(tl);
+       cond_resched();
+
+       tl = mock_timeline(0);
+       if (!tl)
+               return -ENOMEM;
+
+       /* Benchmark searching for a random context id and maybe changing it */
+       prandom_seed_state(&prng, i915_selftest.random_seed);
+       count = 0;
+       kt = ktime_get();
+       end_time = jiffies + HZ/10;
+       do {
+               u32 id = random_engine(&prng);
+               u32 seqno = prandom_u32_state(&prng);
+
+               if (!__intel_timeline_sync_is_later(tl, id, seqno))
+                       __intel_timeline_sync_set(tl, id, seqno);
+
+               count++;
+       } while (!time_after(jiffies, end_time));
+       kt = ktime_sub(ktime_get(), kt);
+       kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+       pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+               __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+       mock_timeline_destroy(tl);
+       cond_resched();
+
+       /* Benchmark searching for a known context id and changing the seqno */
+       for (last_order = 1, order = 1; order < 32;
+            ({ int tmp = last_order; last_order = order; order += tmp; })) {
+               unsigned int mask = BIT(order) - 1;
+
+               tl = mock_timeline(0);
+               if (!tl)
+                       return -ENOMEM;
+
+               count = 0;
+               kt = ktime_get();
+               end_time = jiffies + HZ/10;
+               do {
+                       /* Without assuming too many details of the underlying
+                        * implementation, try to identify its phase-changes
+                        * (if any)!
+                        */
+                       u64 id = (u64)(count & mask) << order;
+
+                       __intel_timeline_sync_is_later(tl, id, 0);
+                       __intel_timeline_sync_set(tl, id, 0);
+
+                       count++;
+               } while (!time_after(jiffies, end_time));
+               kt = ktime_sub(ktime_get(), kt);
+               pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+                       __func__, count, order,
+                       (long long)div64_ul(ktime_to_ns(kt), count));
+               mock_timeline_destroy(tl);
+               cond_resched();
+       }
+
+       return 0;
+}
+
+int i915_gem_timeline_mock_selftests(void)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(igt_sync),
+               SUBTEST(bench_sync),
+       };
+
+       return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h

index be9a9ebf5692d8ed9b0ae67137fbbb4165f2d45e..fc74687501ba923c97f5afe14a13deba7ee6c893 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,9 +9,12 @@
   * Tests are executed in order by igt/drv_selftest
   */
  selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(fence, i915_sw_fence_mock_selftests)
  selftest(scatterlist, scatterlist_mock_selftests)
+selftest(syncmap, i915_syncmap_mock_selftests)
  selftest(uncore, intel_uncore_mock_selftests)
  selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
+selftest(timelines, i915_gem_timeline_mock_selftests)
  selftest(requests, i915_gem_request_mock_selftests)
  selftest(objects, i915_gem_object_mock_selftests)
  selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c

index c17c83c3063784d7da1780435534b8241b801baf..d044bf9a6feb37e6036b44e83a0a70dc1b7f03cb 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -30,6 +30,17 @@
  
  #include "i915_random.h"
  
+u64 i915_prandom_u64_state(struct rnd_state *rnd)
+{
+       u64 x;
+
+       x = prandom_u32_state(rnd);
+       x <<= 32;
+       x |= prandom_u32_state(rnd);
+
+       return x;
+}
+
  static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
  {
         return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h

index b9c334ce6cd9499c07d02ba7388cac86c92f3a92..6c9379871384949f603216359ec511cbd2685b31 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -41,6 +41,8 @@
  #define I915_RND_SUBSTATE(name__, parent__) \
         struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__)))
  
+u64 i915_prandom_u64_state(struct rnd_state *rnd);
+
  unsigned int *i915_random_order(unsigned int count,
                                 struct rnd_state *state);
  void i915_random_reorder(unsigned int *order,
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c

new file mode 100644 (file)

index 0000000..19d145d
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "../i915_selftest.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+       switch (state) {
+       case FENCE_COMPLETE:
+               break;
+
+       case FENCE_FREE:
+               /* Leave the fence for the caller to free it after testing */
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct i915_sw_fence *alloc_fence(void)
+{
+       struct i915_sw_fence *fence;
+
+       fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+       if (!fence)
+               return NULL;
+
+       i915_sw_fence_init(fence, fence_notify);
+       return fence;
+}
+
+static void free_fence(struct i915_sw_fence *fence)
+{
+       i915_sw_fence_fini(fence);
+       kfree(fence);
+}
+
+static int __test_self(struct i915_sw_fence *fence)
+{
+       if (i915_sw_fence_done(fence))
+               return -EINVAL;
+
+       i915_sw_fence_commit(fence);
+       if (!i915_sw_fence_done(fence))
+               return -EINVAL;
+
+       i915_sw_fence_wait(fence);
+       if (!i915_sw_fence_done(fence))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int test_self(void *arg)
+{
+       struct i915_sw_fence *fence;
+       int ret;
+
+       /* Test i915_sw_fence signaling and completion testing */
+       fence = alloc_fence();
+       if (!fence)
+               return -ENOMEM;
+
+       ret = __test_self(fence);
+
+       free_fence(fence);
+       return ret;
+}
+
+static int test_dag(void *arg)
+{
+       struct i915_sw_fence *A, *B, *C;
+       int ret = -EINVAL;
+
+       /* Test detection of cycles within the i915_sw_fence graphs */
+       if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
+               return 0;
+
+       A = alloc_fence();
+       if (!A)
+               return -ENOMEM;
+
+       if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
+               pr_err("recursive cycle not detected (AA)\n");
+               goto err_A;
+       }
+
+       B = alloc_fence();
+       if (!B) {
+               ret = -ENOMEM;
+               goto err_A;
+       }
+
+       i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+       if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
+               pr_err("single depth cycle not detected (BAB)\n");
+               goto err_B;
+       }
+
+       C = alloc_fence();
+       if (!C) {
+               ret = -ENOMEM;
+               goto err_B;
+       }
+
+       if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
+               pr_err("invalid cycle detected\n");
+               goto err_C;
+       }
+       if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
+               pr_err("single depth cycle not detected (CBC)\n");
+               goto err_C;
+       }
+       if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
+               pr_err("cycle not detected (BA, CB, AC)\n");
+               goto err_C;
+       }
+       if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
+               pr_err("invalid cycle detected\n");
+               goto err_C;
+       }
+
+       i915_sw_fence_commit(A);
+       i915_sw_fence_commit(B);
+       i915_sw_fence_commit(C);
+
+       ret = 0;
+       if (!i915_sw_fence_done(C)) {
+               pr_err("fence C not done\n");
+               ret = -EINVAL;
+       }
+       if (!i915_sw_fence_done(B)) {
+               pr_err("fence B not done\n");
+               ret = -EINVAL;
+       }
+       if (!i915_sw_fence_done(A)) {
+               pr_err("fence A not done\n");
+               ret = -EINVAL;
+       }
+err_C:
+       free_fence(C);
+err_B:
+       free_fence(B);
+err_A:
+       free_fence(A);
+       return ret;
+}
+
+static int test_AB(void *arg)
+{
+       struct i915_sw_fence *A, *B;
+       int ret;
+
+       /* Test i915_sw_fence (A) waiting on an event source (B) */
+       A = alloc_fence();
+       if (!A)
+               return -ENOMEM;
+       B = alloc_fence();
+       if (!B) {
+               ret = -ENOMEM;
+               goto err_A;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+       if (ret < 0)
+               goto err_B;
+       if (ret == 0) {
+               pr_err("Incorrectly reported fence A was complete before await\n");
+               ret = -EINVAL;
+               goto err_B;
+       }
+
+       ret = -EINVAL;
+       i915_sw_fence_commit(A);
+       if (i915_sw_fence_done(A))
+               goto err_B;
+
+       i915_sw_fence_commit(B);
+       if (!i915_sw_fence_done(B)) {
+               pr_err("Fence B is not done\n");
+               goto err_B;
+       }
+
+       if (!i915_sw_fence_done(A)) {
+               pr_err("Fence A is not done\n");
+               goto err_B;
+       }
+
+       ret = 0;
+err_B:
+       free_fence(B);
+err_A:
+       free_fence(A);
+       return ret;
+}
+
+static int test_ABC(void *arg)
+{
+       struct i915_sw_fence *A, *B, *C;
+       int ret;
+
+       /* Test a chain of fences, A waits on B who waits on C */
+       A = alloc_fence();
+       if (!A)
+               return -ENOMEM;
+
+       B = alloc_fence();
+       if (!B) {
+               ret = -ENOMEM;
+               goto err_A;
+       }
+
+       C = alloc_fence();
+       if (!C) {
+               ret = -ENOMEM;
+               goto err_B;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               pr_err("Incorrectly reported fence B was complete before await\n");
+               goto err_C;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               pr_err("Incorrectly reported fence C was complete before await\n");
+               goto err_C;
+       }
+
+       ret = -EINVAL;
+       i915_sw_fence_commit(A);
+       if (i915_sw_fence_done(A)) {
+               pr_err("Fence A completed early\n");
+               goto err_C;
+       }
+
+       i915_sw_fence_commit(B);
+       if (i915_sw_fence_done(B)) {
+               pr_err("Fence B completed early\n");
+               goto err_C;
+       }
+
+       if (i915_sw_fence_done(A)) {
+               pr_err("Fence A completed early (after signaling B)\n");
+               goto err_C;
+       }
+
+       i915_sw_fence_commit(C);
+
+       ret = 0;
+       if (!i915_sw_fence_done(C)) {
+               pr_err("Fence C not done\n");
+               ret = -EINVAL;
+       }
+       if (!i915_sw_fence_done(B)) {
+               pr_err("Fence B not done\n");
+               ret = -EINVAL;
+       }
+       if (!i915_sw_fence_done(A)) {
+               pr_err("Fence A not done\n");
+               ret = -EINVAL;
+       }
+err_C:
+       free_fence(C);
+err_B:
+       free_fence(B);
+err_A:
+       free_fence(A);
+       return ret;
+}
+
+static int test_AB_C(void *arg)
+{
+       struct i915_sw_fence *A, *B, *C;
+       int ret = -EINVAL;
+
+       /* Test multiple fences (AB) waiting on a single event (C) */
+       A = alloc_fence();
+       if (!A)
+               return -ENOMEM;
+
+       B = alloc_fence();
+       if (!B) {
+               ret = -ENOMEM;
+               goto err_A;
+       }
+
+       C = alloc_fence();
+       if (!C) {
+               ret = -ENOMEM;
+               goto err_B;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               ret = -EINVAL;
+               goto err_C;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               ret = -EINVAL;
+               goto err_C;
+       }
+
+       i915_sw_fence_commit(A);
+       i915_sw_fence_commit(B);
+
+       ret = 0;
+       if (i915_sw_fence_done(A)) {
+               pr_err("Fence A completed early\n");
+               ret = -EINVAL;
+       }
+
+       if (i915_sw_fence_done(B)) {
+               pr_err("Fence B completed early\n");
+               ret = -EINVAL;
+       }
+
+       i915_sw_fence_commit(C);
+       if (!i915_sw_fence_done(C)) {
+               pr_err("Fence C not done\n");
+               ret = -EINVAL;
+       }
+
+       if (!i915_sw_fence_done(B)) {
+               pr_err("Fence B not done\n");
+               ret = -EINVAL;
+       }
+
+       if (!i915_sw_fence_done(A)) {
+               pr_err("Fence A not done\n");
+               ret = -EINVAL;
+       }
+
+err_C:
+       free_fence(C);
+err_B:
+       free_fence(B);
+err_A:
+       free_fence(A);
+       return ret;
+}
+
+static int test_C_AB(void *arg)
+{
+       struct i915_sw_fence *A, *B, *C;
+       int ret;
+
+       /* Test multiple event sources (A,B) for a single fence (C) */
+       A = alloc_fence();
+       if (!A)
+               return -ENOMEM;
+
+       B = alloc_fence();
+       if (!B) {
+               ret = -ENOMEM;
+               goto err_A;
+       }
+
+       C = alloc_fence();
+       if (!C) {
+               ret = -ENOMEM;
+               goto err_B;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               ret = -EINVAL;
+               goto err_C;
+       }
+
+       ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
+       if (ret < 0)
+               goto err_C;
+       if (ret == 0) {
+               ret = -EINVAL;
+               goto err_C;
+       }
+
+       ret = 0;
+       i915_sw_fence_commit(C);
+       if (i915_sw_fence_done(C))
+               ret = -EINVAL;
+
+       i915_sw_fence_commit(A);
+       i915_sw_fence_commit(B);
+
+       if (!i915_sw_fence_done(A)) {
+               pr_err("Fence A not done\n");
+               ret = -EINVAL;
+       }
+
+       if (!i915_sw_fence_done(B)) {
+               pr_err("Fence B not done\n");
+               ret = -EINVAL;
+       }
+
+       if (!i915_sw_fence_done(C)) {
+               pr_err("Fence C not done\n");
+               ret = -EINVAL;
+       }
+
+err_C:
+       free_fence(C);
+err_B:
+       free_fence(B);
+err_A:
+       free_fence(A);
+       return ret;
+}
+
+static int test_chain(void *arg)
+{
+       int nfences = 4096;
+       struct i915_sw_fence **fences;
+       int ret, i;
+
+       /* Test a long chain of fences */
+       fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+       if (!fences)
+               return -ENOMEM;
+
+       for (i = 0; i < nfences; i++) {
+               fences[i] = alloc_fence();
+               if (!fences[i]) {
+                       nfences = i;
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               if (i > 0) {
+                       ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
+                                                              fences[i - 1],
+                                                              GFP_KERNEL);
+                       if (ret < 0) {
+                               nfences = i + 1;
+                               goto err;
+                       }
+
+                       i915_sw_fence_commit(fences[i]);
+               }
+       }
+
+       ret = 0;
+       for (i = nfences; --i; ) {
+               if (i915_sw_fence_done(fences[i])) {
+                       if (ret == 0)
+                               pr_err("Fence[%d] completed early\n", i);
+                       ret = -EINVAL;
+               }
+       }
+       i915_sw_fence_commit(fences[0]);
+       for (i = 0; ret == 0 && i < nfences; i++) {
+               if (!i915_sw_fence_done(fences[i])) {
+                       pr_err("Fence[%d] is not done\n", i);
+                       ret = -EINVAL;
+               }
+       }
+
+err:
+       for (i = 0; i < nfences; i++)
+               free_fence(fences[i]);
+       kfree(fences);
+       return ret;
+}
+
+struct task_ipc {
+       struct work_struct work;
+       struct completion started;
+       struct i915_sw_fence *in, *out;
+       int value;
+};
+
+static void task_ipc(struct work_struct *work)
+{
+       struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+       complete(&ipc->started);
+
+       i915_sw_fence_wait(ipc->in);
+       smp_store_mb(ipc->value, 1);
+       i915_sw_fence_commit(ipc->out);
+}
+
+static int test_ipc(void *arg)
+{
+       struct task_ipc ipc;
+       int ret = 0;
+
+       /* Test use of i915_sw_fence as an interprocess signaling mechanism */
+       ipc.in = alloc_fence();
+       if (!ipc.in)
+               return -ENOMEM;
+       ipc.out = alloc_fence();
+       if (!ipc.out) {
+               ret = -ENOMEM;
+               goto err_in;
+       }
+
+       /* use a completion to avoid chicken-and-egg testing */
+       init_completion(&ipc.started);
+
+       ipc.value = 0;
+       INIT_WORK_ONSTACK(&ipc.work, task_ipc);
+       schedule_work(&ipc.work);
+
+       wait_for_completion(&ipc.started);
+
+       usleep_range(1000, 2000);
+       if (READ_ONCE(ipc.value)) {
+               pr_err("worker updated value before i915_sw_fence was signaled\n");
+               ret = -EINVAL;
+       }
+
+       i915_sw_fence_commit(ipc.in);
+       i915_sw_fence_wait(ipc.out);
+
+       if (!READ_ONCE(ipc.value)) {
+               pr_err("worker signaled i915_sw_fence before value was posted\n");
+               ret = -EINVAL;
+       }
+
+       flush_work(&ipc.work);
+       destroy_work_on_stack(&ipc.work);
+       free_fence(ipc.out);
+err_in:
+       free_fence(ipc.in);
+       return ret;
+}
+
+int i915_sw_fence_mock_selftests(void)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(test_self),
+               SUBTEST(test_dag),
+               SUBTEST(test_AB),
+               SUBTEST(test_ABC),
+               SUBTEST(test_AB_C),
+               SUBTEST(test_C_AB),
+               SUBTEST(test_chain),
+               SUBTEST(test_ipc),
+       };
+
+       return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c

new file mode 100644 (file)

index 0000000..bcab3d0
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+static char *
+__sync_print(struct i915_syncmap *p,
+            char *buf, unsigned long *sz,
+            unsigned int depth,
+            unsigned int last,
+            unsigned int idx)
+{
+       unsigned long len;
+       unsigned int i, X;
+
+       if (depth) {
+               unsigned int d;
+
+               for (d = 0; d < depth - 1; d++) {
+                       if (last & BIT(depth - d - 1))
+                               len = scnprintf(buf, *sz, "|   ");
+                       else
+                               len = scnprintf(buf, *sz, "    ");
+                       buf += len;
+                       *sz -= len;
+               }
+               len = scnprintf(buf, *sz, "%x-> ", idx);
+               buf += len;
+               *sz -= len;
+       }
+
+       /* We mark bits after the prefix as "X" */
+       len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT);
+       buf += len;
+       *sz -= len;
+       X = (p->height + SHIFT) / 4;
+       scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX");
+
+       if (!p->height) {
+               for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+                       len = scnprintf(buf, *sz, " %x:%x,",
+                                       i, __sync_seqno(p)[i]);
+                       buf += len;
+                       *sz -= len;
+               }
+               buf -= 1;
+               *sz += 1;
+       }
+
+       len = scnprintf(buf, *sz, "\n");
+       buf += len;
+       *sz -= len;
+
+       if (p->height) {
+               for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+                       buf = __sync_print(__sync_child(p)[i], buf, sz,
+                                          depth + 1,
+                                          last << 1 | !!(p->bitmap >> (i + 1)),
+                                          i);
+               }
+       }
+
+       return buf;
+}
+
+static bool
+i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz)
+{
+       if (!p)
+               return false;
+
+       while (p->parent)
+               p = p->parent;
+
+       __sync_print(p, buf, &sz, 0, 1, 0);
+       return true;
+}
+
+static int check_syncmap_free(struct i915_syncmap **sync)
+{
+       i915_syncmap_free(sync);
+       if (*sync) {
+               pr_err("sync not cleared after free\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int dump_syncmap(struct i915_syncmap *sync, int err)
+{
+       char *buf;
+
+       if (!err)
+               return check_syncmap_free(&sync);
+
+       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!buf)
+               goto skip;
+
+       if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE))
+               pr_err("%s", buf);
+
+       kfree(buf);
+
+skip:
+       i915_syncmap_free(&sync);
+       return err;
+}
+
+static int igt_syncmap_init(void *arg)
+{
+       struct i915_syncmap *sync = (void *)~0ul;
+
+       /*
+        * Cursory check that we can initialise a random pointer and transform
+        * it into the root pointer of a syncmap.
+        */
+
+       i915_syncmap_init(&sync);
+       return check_syncmap_free(&sync);
+}
+
+static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno)
+{
+       if (leaf->height) {
+               pr_err("%s: not a leaf, height is %d\n",
+                      __func__, leaf->height);
+               return -EINVAL;
+       }
+
+       if (__sync_seqno(leaf)[idx] != seqno) {
+               pr_err("%s: seqno[%d], found %x, expected %x\n",
+                      __func__, idx, __sync_seqno(leaf)[idx], seqno);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+       int err;
+
+       err = i915_syncmap_set(sync, context, seqno);
+       if (err)
+               return err;
+
+       if ((*sync)->height) {
+               pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n",
+                      context, (*sync)->height, (*sync)->prefix);
+               return -EINVAL;
+       }
+
+       if ((*sync)->parent) {
+               pr_err("Inserting first context=%llx created branches!\n",
+                      context);
+               return -EINVAL;
+       }
+
+       if (hweight32((*sync)->bitmap) != 1) {
+               pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n",
+                      (*sync)->bitmap, hweight32((*sync)->bitmap));
+               return -EINVAL;
+       }
+
+       err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+       if (err)
+               return err;
+
+       if (!i915_syncmap_is_later(sync, context, seqno)) {
+               pr_err("Lookup of first context=%llx/seqno=%x failed!\n",
+                      context, seqno);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int igt_syncmap_one(void *arg)
+{
+       I915_RND_STATE(prng);
+       IGT_TIMEOUT(end_time);
+       struct i915_syncmap *sync;
+       unsigned long max = 1;
+       int err;
+
+       /*
+        * Check that inserting a new id, creates a leaf and only that leaf.
+        */
+
+       i915_syncmap_init(&sync);
+
+       do {
+               u64 context = i915_prandom_u64_state(&prng);
+               unsigned long loop;
+
+               err = check_syncmap_free(&sync);
+               if (err)
+                       goto out;
+
+               for (loop = 0; loop <= max; loop++) {
+                       err = check_one(&sync, context,
+                                       prandom_u32_state(&prng));
+                       if (err)
+                               goto out;
+               }
+               max++;
+       } while (!__igt_timeout(end_time, NULL));
+       pr_debug("%s: Completed %lu single insertions\n",
+                __func__, max * (max - 1) / 2);
+out:
+       return dump_syncmap(sync, err);
+}
+
+static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+       int err;
+
+       err = i915_syncmap_set(sync, context, seqno);
+       if (err)
+               return err;
+
+       if ((*sync)->height) {
+               pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+                      context, (*sync)->height, (*sync)->prefix);
+               return -EINVAL;
+       }
+
+       if (hweight32((*sync)->bitmap) != 1) {
+               pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n",
+                      context, (*sync)->bitmap, hweight32((*sync)->bitmap));
+               return -EINVAL;
+       }
+
+       err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+       if (err)
+               return err;
+
+       if (!i915_syncmap_is_later(sync, context, seqno)) {
+               pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n",
+                      context, seqno);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int igt_syncmap_join_above(void *arg)
+{
+       struct i915_syncmap *sync;
+       unsigned int pass, order;
+       int err;
+
+       i915_syncmap_init(&sync);
+
+       /*
+        * When we have a new id that doesn't fit inside the existing tree,
+        * we need to add a new layer above.
+        *
+        * 1: 0x00000001
+        * 2: 0x00000010
+        * 3: 0x00000100
+        * 4: 0x00001000
+        * ...
+        * Each pass the common prefix shrinks and we have to insert a join.
+        * Each join will only contain two branches, the latest of which
+        * is always a leaf.
+        *
+        * If we then reuse the same set of contexts, we expect to build an
+        * identical tree.
+        */
+       for (pass = 0; pass < 3; pass++) {
+               for (order = 0; order < 64; order += SHIFT) {
+                       u64 context = BIT_ULL(order);
+                       struct i915_syncmap *join;
+
+                       err = check_leaf(&sync, context, 0);
+                       if (err)
+                               goto out;
+
+                       join = sync->parent;
+                       if (!join) /* very first insert will have no parents */
+                               continue;
+
+                       if (!join->height) {
+                               pr_err("Parent with no height!\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (hweight32(join->bitmap) != 2) {
+                               pr_err("Join does not have 2 children: %x (%d)\n",
+                                      join->bitmap, hweight32(join->bitmap));
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) {
+                               pr_err("Leaf misplaced in parent!\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+       }
+out:
+       return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_join_below(void *arg)
+{
+       struct i915_syncmap *sync;
+       unsigned int step, order, idx;
+       int err;
+
+       i915_syncmap_init(&sync);
+
+       /*
+        * Check that we can split a compacted branch by replacing it with
+        * a join.
+        */
+       for (step = 0; step < KSYNCMAP; step++) {
+               for (order = 64 - SHIFT; order > 0; order -= SHIFT) {
+                       u64 context = step * BIT_ULL(order);
+
+                       err = i915_syncmap_set(&sync, context, 0);
+                       if (err)
+                               goto out;
+
+                       if (sync->height) {
+                               pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n",
+                                      context, order, step, sync->height, sync->prefix);
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+       }
+
+       for (step = 0; step < KSYNCMAP; step++) {
+               for (order = SHIFT; order < 64; order += SHIFT) {
+                       u64 context = step * BIT_ULL(order);
+
+                       if (!i915_syncmap_is_later(&sync, context, 0)) {
+                               pr_err("1: context %llx (order=%d, step=%d) not found\n",
+                                      context, order, step);
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       for (idx = 1; idx < KSYNCMAP; idx++) {
+                               if (i915_syncmap_is_later(&sync, context + idx, 0)) {
+                                       pr_err("1: context %llx (order=%d, step=%d) should not exist\n",
+                                              context + idx, order, step);
+                                       err = -EINVAL;
+                                       goto out;
+                               }
+                       }
+               }
+       }
+
+       for (order = SHIFT; order < 64; order += SHIFT) {
+               for (step = 0; step < KSYNCMAP; step++) {
+                       u64 context = step * BIT_ULL(order);
+
+                       if (!i915_syncmap_is_later(&sync, context, 0)) {
+                               pr_err("2: context %llx (order=%d, step=%d) not found\n",
+                                      context, order, step);
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+       }
+
+out:
+       return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_neighbours(void *arg)
+{
+       I915_RND_STATE(prng);
+       IGT_TIMEOUT(end_time);
+       struct i915_syncmap *sync;
+       int err;
+
+       /*
+        * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP
+        * neighbouring ids, they all fit into the same leaf.
+        */
+
+       i915_syncmap_init(&sync);
+       do {
+               u64 context = i915_prandom_u64_state(&prng) & ~MASK;
+               unsigned int idx;
+
+               if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */
+                       continue;
+
+               for (idx = 0; idx < KSYNCMAP; idx++) {
+                       err = i915_syncmap_set(&sync, context + idx, 0);
+                       if (err)
+                               goto out;
+
+                       if (sync->height) {
+                               pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+                                      context, sync->height, sync->prefix);
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (sync->bitmap != BIT(idx + 1) - 1) {
+                               pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n",
+                                      context, idx,
+                                      sync->bitmap, hweight32(sync->bitmap),
+                                      BIT(idx + 1) - 1, idx + 1);
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+       } while (!__igt_timeout(end_time, NULL));
+out:
+       return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_compact(void *arg)
+{
+       struct i915_syncmap *sync;
+       unsigned int idx, order;
+       int err;
+
+       i915_syncmap_init(&sync);
+
+       /*
+        * The syncmap are "space efficient" compressed radix trees - any
+        * branch with only one child is skipped and replaced by the child.
+        *
+        * If we construct a tree with ids that are neighbouring at a non-zero
+        * height, we form a join but each child of that join is directly a
+        * leaf holding the single id.
+        */
+       for (order = SHIFT; order < 64; order += SHIFT) {
+               err = check_syncmap_free(&sync);
+               if (err)
+                       goto out;
+
+               /* Create neighbours in the parent */
+               for (idx = 0; idx < KSYNCMAP; idx++) {
+                       u64 context = idx * BIT_ULL(order) + idx;
+
+                       err = i915_syncmap_set(&sync, context, 0);
+                       if (err)
+                               goto out;
+
+                       if (sync->height) {
+                               pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n",
+                                      context, order, idx,
+                                      sync->height, sync->prefix);
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+
+               sync = sync->parent;
+               if (sync->parent) {
+                       pr_err("Parent (join) of last leaf was not the sync!\n");
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               if (sync->height != order) {
+                       pr_err("Join does not have the expected height, found %d, expected %d\n",
+                              sync->height, order);
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               if (sync->bitmap != BIT(KSYNCMAP) - 1) {
+                       pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n",
+                              sync->bitmap, hweight32(sync->bitmap),
+                              BIT(KSYNCMAP) - 1, KSYNCMAP);
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               /* Each of our children should be a leaf */
+               for (idx = 0; idx < KSYNCMAP; idx++) {
+                       struct i915_syncmap *leaf = __sync_child(sync)[idx];
+
+                       if (leaf->height) {
+                               pr_err("Child %d is a not leaf!\n", idx);
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (leaf->parent != sync) {
+                               pr_err("Child %d is not attached to us!\n",
+                                      idx);
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (!is_power_of_2(leaf->bitmap)) {
+                               pr_err("Child %d holds more than one id, found %x (%d)\n",
+                                      idx, leaf->bitmap, hweight32(leaf->bitmap));
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       if (leaf->bitmap != BIT(idx)) {
+                               pr_err("Child %d has wrong seqno idx, found %d, expected %d\n",
+                                      idx, ilog2(leaf->bitmap), idx);
+                               err = -EINVAL;
+                               goto out;
+                       }
+               }
+       }
+out:
+       return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_random(void *arg)
+{
+       I915_RND_STATE(prng);
+       IGT_TIMEOUT(end_time);
+       struct i915_syncmap *sync;
+       unsigned long count, phase, i;
+       u32 seqno;
+       int err;
+
+       i915_syncmap_init(&sync);
+
+       /*
+        * Having tried to test the individual operations within i915_syncmap,
+        * run a smoketest exploring the entire u64 space with random
+        * insertions.
+        */
+
+       count = 0;
+       phase = jiffies + HZ/100 + 1;
+       do {
+               u64 context = i915_prandom_u64_state(&prng);
+
+               err = i915_syncmap_set(&sync, context, 0);
+               if (err)
+                       goto out;
+
+               count++;
+       } while (!time_after(jiffies, phase));
+       seqno = 0;
+
+       phase = 0;
+       do {
+               I915_RND_STATE(ctx);
+               u32 last_seqno = seqno;
+               bool expect;
+
+               seqno = prandom_u32_state(&prng);
+               expect = seqno_later(last_seqno, seqno);
+
+               for (i = 0; i < count; i++) {
+                       u64 context = i915_prandom_u64_state(&ctx);
+
+                       if (i915_syncmap_is_later(&sync, context, seqno) != expect) {
+                               pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n",
+                                      context, last_seqno, seqno, expect);
+                               err = -EINVAL;
+                               goto out;
+                       }
+
+                       err = i915_syncmap_set(&sync, context, seqno);
+                       if (err)
+                               goto out;
+               }
+
+               phase++;
+       } while (!__igt_timeout(end_time, NULL));
+       pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count);
+out:
+       return dump_syncmap(sync, err);
+}
+
+int i915_syncmap_mock_selftests(void)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(igt_syncmap_init),
+               SUBTEST(igt_syncmap_one),
+               SUBTEST(igt_syncmap_join_above),
+               SUBTEST(igt_syncmap_join_below),
+               SUBTEST(igt_syncmap_neighbours),
+               SUBTEST(igt_syncmap_compact),
+               SUBTEST(igt_syncmap_random),
+       };
+
+       return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c

index 0ad624a1db90a0315a19fc1a973d49f7ccfe68b9..5b18a2dc19a87a9fd98fae33a5a8b00fb2eb3cdc 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -52,11 +52,12 @@ static void hw_delay_complete(unsigned long data)
         spin_unlock(&engine->hw_lock);
  }
  
-static int mock_context_pin(struct intel_engine_cs *engine,
-                           struct i915_gem_context *ctx)
+static struct intel_ring *
+mock_context_pin(struct intel_engine_cs *engine,
+                struct i915_gem_context *ctx)
  {
         i915_gem_context_get(ctx);
-       return 0;
+       return engine->buffer;
  }
  
  static void mock_context_unpin(struct intel_engine_cs *engine,
@@ -72,7 +73,6 @@ static int mock_request_alloc(struct drm_i915_gem_request *request)
         INIT_LIST_HEAD(&mock->link);
         mock->delay = 0;
  
-       request->ring = request->engine->buffer;
         return 0;
  }
  
@@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
         if (!ring)
                 return NULL;
  
-       ring->engine = engine;
         ring->size = sz;
         ring->effective_size = sz;
         ring->vaddr = (void *)(ring + 1);
@@ -141,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
  
         /* minimal engine setup for requests */
         engine->base.i915 = i915;
-       engine->base.name = name;
+       snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
         engine->base.id = id++;
         engine->base.status_page.page_addr = (void *)(engine + 1);
  
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c

index 9f24c5da3f8d3c62b32343e870564f73a740cfd2..627e2aa097665f7667f2cf6a834112dd28ac1a3a 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -30,6 +30,7 @@
  #include "mock_gem_device.h"
  #include "mock_gem_object.h"
  #include "mock_gtt.h"
+#include "mock_uncore.h"
  
  void mock_device_flush(struct drm_i915_private *i915)
  {
@@ -73,6 +74,7 @@ static void mock_device_release(struct drm_device *dev)
  
         destroy_workqueue(i915->wq);
  
+       kmem_cache_destroy(i915->priorities);
         kmem_cache_destroy(i915->dependencies);
         kmem_cache_destroy(i915->requests);
         kmem_cache_destroy(i915->vmas);
@@ -119,6 +121,7 @@ struct drm_i915_private *mock_gem_device(void)
                 goto err;
  
         device_initialize(&pdev->dev);
+       pdev->class = PCI_BASE_CLASS_DISPLAY << 16;
         pdev->dev.release = release_dev;
         dev_set_name(&pdev->dev, "mock");
         dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
@@ -143,6 +146,7 @@ struct drm_i915_private *mock_gem_device(void)
         mkwrite_device_info(i915)->gen = -1;
  
         spin_lock_init(&i915->mm.object_stat_lock);
+       mock_uncore_init(i915);
  
         init_waitqueue_head(&i915->gpu_error.wait_queue);
         init_waitqueue_head(&i915->gpu_error.reset_queue);
@@ -184,12 +188,16 @@ struct drm_i915_private *mock_gem_device(void)
         if (!i915->dependencies)
                 goto err_requests;
  
+       i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+       if (!i915->priorities)
+               goto err_dependencies;
+
         mutex_lock(&i915->drm.struct_mutex);
         INIT_LIST_HEAD(&i915->gt.timelines);
         err = i915_gem_timeline_init__global(i915);
         if (err) {
                 mutex_unlock(&i915->drm.struct_mutex);
-               goto err_dependencies;
+               goto err_priorities;
         }
  
         mock_init_ggtt(i915);
@@ -209,6 +217,8 @@ struct drm_i915_private *mock_gem_device(void)
  err_engine:
         for_each_engine(engine, i915, id)
                 mock_engine_free(engine);
+err_priorities:
+       kmem_cache_destroy(i915->priorities);
  err_dependencies:
         kmem_cache_destroy(i915->dependencies);
  err_requests:
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c

new file mode 100644 (file)

index 0000000..47b1f47
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context)
+{
+       static struct lock_class_key class;
+       struct intel_timeline *tl;
+
+       tl = kzalloc(sizeof(*tl), GFP_KERNEL);
+       if (!tl)
+               return NULL;
+
+       __intel_timeline_init(tl, NULL, context, &class, "mock");
+
+       return tl;
+}
+
+void mock_timeline_destroy(struct intel_timeline *tl)
+{
+       __intel_timeline_fini(tl);
+       kfree(tl);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h

new file mode 100644 (file)

index 0000000..c27ff46
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_TIMELINE__
+#define __MOCK_TIMELINE__
+
+#include "../i915_gem_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context);
+void mock_timeline_destroy(struct intel_timeline *tl);
+
+#endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c

new file mode 100644 (file)

index 0000000..8ef14c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_uncore.h"
+
+#define __nop_write(x) \
+static void \
+nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { }
+__nop_write(8)
+__nop_write(16)
+__nop_write(32)
+
+#define __nop_read(x) \
+static u##x \
+nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; }
+__nop_read(8)
+__nop_read(16)
+__nop_read(32)
+__nop_read(64)
+
+void mock_uncore_init(struct drm_i915_private *i915)
+{
+       ASSIGN_WRITE_MMIO_VFUNCS(i915, nop);
+       ASSIGN_READ_MMIO_VFUNCS(i915, nop);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h

new file mode 100644 (file)

index 0000000..d79aa3c
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_UNCORE_H
+#define __MOCK_UNCORE_H
+
+void mock_uncore_init(struct drm_i915_private *i915);
+
+#endif /* !__MOCK_UNCORE_H */
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h

index f7007e544f29b5558ebf055e424a55f6ab965854..20eb5ca285942653dfa362374eee8fa36c05b1c8 100644 (file)
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -679,10 +679,12 @@
  #define DP_EDP_PWMGEN_BIT_COUNT             0x724
  #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN     0x725
  #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX     0x726
+# define DP_EDP_PWMGEN_BIT_COUNT_MASK       (0x1f << 0)
  
  #define DP_EDP_BACKLIGHT_CONTROL_STATUS     0x727
  
  #define DP_EDP_BACKLIGHT_FREQ_SET           0x728
+# define DP_EDP_BACKLIGHT_FREQ_BASE_KHZ     27000
  
  #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MSB   0x72a
  #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MID   0x72b
diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h

index e9892b4c3af10c103cbe27f9ee17d1e31c0ba5ac..b6121c8fe5399637e117f7c2aa262b31a5d4d62a 100644 (file)
--- a/include/drm/intel_lpe_audio.h
+++ b/include/drm/intel_lpe_audio.h
@@ -31,20 +31,20 @@ struct platform_device;
  
  #define HDMI_MAX_ELD_BYTES     128
  
-struct intel_hdmi_lpe_audio_eld {
-       int port_id;
-       int pipe_id;
-       unsigned char eld_data[HDMI_MAX_ELD_BYTES];
+struct intel_hdmi_lpe_audio_port_pdata {
+       u8 eld[HDMI_MAX_ELD_BYTES];
+       int port;
+       int pipe;
+       int ls_clock;
+       bool dp_output;
  };
  
  struct intel_hdmi_lpe_audio_pdata {
-       bool notify_pending;
-       int tmds_clock_speed;
-       bool hdmi_connected;
-       bool dp_output;
-       int link_rate;
-       struct intel_hdmi_lpe_audio_eld eld;
-       void (*notify_audio_lpe)(struct platform_device *pdev);
+       struct intel_hdmi_lpe_audio_port_pdata port[3]; /* for ports B,C,D */
+       int num_ports;
+       int num_pipes;
+
+       void (*notify_audio_lpe)(struct platform_device *pdev, int port); /* port: 0==B,1==C,2==D */
         spinlock_t lpe_audio_slock;
  };
  
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 3554495bef13b9f4c9ff452e521a234803d91e7b..f24a80d2d42e0ebe9cb5d033706521430325b318 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait {
   */
  #define I915_PARAM_HAS_EXEC_FENCE       44
  
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE     45
+
  typedef struct drm_i915_getparam {
         __s32 param;
         /*
@@ -666,6 +672,8 @@ struct drm_i915_gem_relocation_entry {
  #define I915_GEM_DOMAIN_VERTEX         0x00000020
  /** GTT domain - aperture and scanout */
  #define I915_GEM_DOMAIN_GTT            0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC             0x00000080
  /** @} */
  
  struct drm_i915_gem_exec_object {
@@ -773,8 +781,15 @@ struct drm_i915_gem_exec_object2 {
   * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
   */
  #define EXEC_OBJECT_ASYNC              (1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE            (1<<7)
  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
         __u64 flags;
  
         union {
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c

index 664b7fe206d65457cf3e7b1486a05b7e92493cb8..c19efc9708d7f2fb30e0ac64f4ee34c0023e2186 100644 (file)
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -42,6 +42,11 @@
  #include <drm/intel_lpe_audio.h>
  #include "intel_hdmi_audio.h"
  
+#define for_each_pipe(card_ctx, pipe) \
+       for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++)
+#define for_each_port(card_ctx, port) \
+       for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++)
+
  /*standard module options for ALSA. This module supports only one card*/
  static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
  static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
@@ -189,15 +194,30 @@ static void had_substream_put(struct snd_intelhad *intelhaddata)
         spin_unlock_irqrestore(&intelhaddata->had_spinlock, flags);
  }
  
+static u32 had_config_offset(int pipe)
+{
+       switch (pipe) {
+       default:
+       case 0:
+               return AUDIO_HDMI_CONFIG_A;
+       case 1:
+               return AUDIO_HDMI_CONFIG_B;
+       case 2:
+               return AUDIO_HDMI_CONFIG_C;
+       }
+}
+
  /* Register access functions */
-static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg)
+static u32 had_read_register_raw(struct snd_intelhad_card *card_ctx,
+                                int pipe, u32 reg)
  {
-       return ioread32(ctx->mmio_start + ctx->had_config_offset + reg);
+       return ioread32(card_ctx->mmio_start + had_config_offset(pipe) + reg);
  }
  
-static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val)
+static void had_write_register_raw(struct snd_intelhad_card *card_ctx,
+                                  int pipe, u32 reg, u32 val)
  {
-       iowrite32(val, ctx->mmio_start + ctx->had_config_offset + reg);
+       iowrite32(val, card_ctx->mmio_start + had_config_offset(pipe) + reg);
  }
  
  static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
@@ -205,13 +225,13 @@ static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
         if (!ctx->connected)
                 *val = 0;
         else
-               *val = had_read_register_raw(ctx, reg);
+               *val = had_read_register_raw(ctx->card_ctx, ctx->pipe, reg);
  }
  
  static void had_write_register(struct snd_intelhad *ctx, u32 reg, u32 val)
  {
         if (ctx->connected)
-               had_write_register_raw(ctx, reg, val);
+               had_write_register_raw(ctx->card_ctx, ctx->pipe, reg, val);
  }
  
  /*
@@ -1358,6 +1378,9 @@ static void had_process_hot_plug(struct snd_intelhad *intelhaddata)
                 return;
         }
  
+       /* Disable Audio */
+       had_enable_audio(intelhaddata, false);
+
         intelhaddata->connected = true;
         dev_dbg(intelhaddata->dev,
                 "%s @ %d:DEBUG PLUG/UNPLUG : HAD_DRV_CONNECTED\n",
@@ -1519,22 +1542,32 @@ static const struct snd_kcontrol_new had_controls[] = {
   */
  static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
  {
-       struct snd_intelhad *ctx = dev_id;
-       u32 audio_stat;
+       struct snd_intelhad_card *card_ctx = dev_id;
+       u32 audio_stat[3] = {};
+       int pipe, port;
+
+       for_each_pipe(card_ctx, pipe) {
+               /* use raw register access to ack IRQs even while disconnected */
+               audio_stat[pipe] = had_read_register_raw(card_ctx, pipe,
+                                                        AUD_HDMI_STATUS) &
+                       (HDMI_AUDIO_UNDERRUN | HDMI_AUDIO_BUFFER_DONE);
+
+               if (audio_stat[pipe])
+                       had_write_register_raw(card_ctx, pipe,
+                                              AUD_HDMI_STATUS, audio_stat[pipe]);
+       }
  
-       /* use raw register access to ack IRQs even while disconnected */
-       audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS);
+       for_each_port(card_ctx, port) {
+               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+               int pipe = ctx->pipe;
  
-       if (audio_stat & HDMI_AUDIO_UNDERRUN) {
-               had_write_register_raw(ctx, AUD_HDMI_STATUS,
-                                      HDMI_AUDIO_UNDERRUN);
-               had_process_buffer_underrun(ctx);
-       }
+               if (pipe < 0)
+                       continue;
  
-       if (audio_stat & HDMI_AUDIO_BUFFER_DONE) {
-               had_write_register_raw(ctx, AUD_HDMI_STATUS,
-                                      HDMI_AUDIO_BUFFER_DONE);
-               had_process_buffer_done(ctx);
+               if (audio_stat[pipe] & HDMI_AUDIO_BUFFER_DONE)
+                       had_process_buffer_done(ctx);
+               if (audio_stat[pipe] & HDMI_AUDIO_UNDERRUN)
+                       had_process_buffer_underrun(ctx);
         }
  
         return IRQ_HANDLED;
@@ -1543,9 +1576,10 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
  /*
   * monitor plug/unplug notification from i915; just kick off the work
   */
-static void notify_audio_lpe(struct platform_device *pdev)
+static void notify_audio_lpe(struct platform_device *pdev, int port)
  {
-       struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+       struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
+       struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
  
         schedule_work(&ctx->hdmi_audio_wq);
  }
@@ -1556,47 +1590,51 @@ static void had_audio_wq(struct work_struct *work)
         struct snd_intelhad *ctx =
                 container_of(work, struct snd_intelhad, hdmi_audio_wq);
         struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data;
+       struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port];
  
         pm_runtime_get_sync(ctx->dev);
         mutex_lock(&ctx->mutex);
-       if (!pdata->hdmi_connected) {
-               dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n",
-                       __func__);
+       if (ppdata->pipe < 0) {
+               dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n",
+                       __func__, ctx->port);
+
                 memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */
+
+               ctx->dp_output = false;
+               ctx->tmds_clock_speed = 0;
+               ctx->link_rate = 0;
+
+               /* Shut down the stream */
                 had_process_hot_unplug(ctx);
-       } else {
-               struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld;
  
+               ctx->pipe = -1;
+       } else {
                 dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n",
-                       __func__, eld->port_id, pdata->tmds_clock_speed);
+                       __func__, ctx->port, ppdata->ls_clock);
  
-               switch (eld->pipe_id) {
-               case 0:
-                       ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-                       break;
-               case 1:
-                       ctx->had_config_offset = AUDIO_HDMI_CONFIG_B;
-                       break;
-               case 2:
-                       ctx->had_config_offset = AUDIO_HDMI_CONFIG_C;
-                       break;
-               default:
-                       dev_dbg(ctx->dev, "Invalid pipe %d\n",
-                               eld->pipe_id);
-                       break;
-               }
-
-               memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld));
+               memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld));
  
-               ctx->dp_output = pdata->dp_output;
-               ctx->tmds_clock_speed = pdata->tmds_clock_speed;
-               ctx->link_rate = pdata->link_rate;
+               ctx->dp_output = ppdata->dp_output;
+               if (ctx->dp_output) {
+                       ctx->tmds_clock_speed = 0;
+                       ctx->link_rate = ppdata->ls_clock;
+               } else {
+                       ctx->tmds_clock_speed = ppdata->ls_clock;
+                       ctx->link_rate = 0;
+               }
  
+               /*
+                * Shut down the stream before we change
+                * the pipe assignment for this pcm device
+                */
                 had_process_hot_plug(ctx);
  
-               /* Process mode change if stream is active */
+               ctx->pipe = ppdata->pipe;
+
+               /* Restart the stream if necessary */
                 had_process_mode_change(ctx);
         }
+
         mutex_unlock(&ctx->mutex);
         pm_runtime_mark_last_busy(ctx->dev);
         pm_runtime_put_autosuspend(ctx->dev);
@@ -1605,11 +1643,17 @@ static void had_audio_wq(struct work_struct *work)
  /*
   * Jack interface
   */
-static int had_create_jack(struct snd_intelhad *ctx)
+static int had_create_jack(struct snd_intelhad *ctx,
+                          struct snd_pcm *pcm)
  {
+       char hdmi_str[32];
         int err;
  
-       err = snd_jack_new(ctx->card, "HDMI/DP", SND_JACK_AVOUT, &ctx->jack,
+       snprintf(hdmi_str, sizeof(hdmi_str),
+                "HDMI/DP,pcm=%d", pcm->device);
+
+       err = snd_jack_new(ctx->card_ctx->card, hdmi_str,
+                          SND_JACK_AVOUT, &ctx->jack,
                            true, false);
         if (err < 0)
                 return err;
@@ -1623,13 +1667,18 @@ static int had_create_jack(struct snd_intelhad *ctx)
  
  static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
  {
-       struct snd_intelhad *ctx = dev_get_drvdata(dev);
-       struct snd_pcm_substream *substream;
+       struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
+       int port;
  
-       substream = had_substream_get(ctx);
-       if (substream) {
-               snd_pcm_suspend(substream);
-               had_substream_put(ctx);
+       for_each_port(card_ctx, port) {
+               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+               struct snd_pcm_substream *substream;
+
+               substream = had_substream_get(ctx);
+               if (substream) {
+                       snd_pcm_suspend(substream);
+                       had_substream_put(ctx);
+               }
         }
  
         return 0;
@@ -1637,12 +1686,12 @@ static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
  
  static int __maybe_unused hdmi_lpe_audio_suspend(struct device *dev)
  {
-       struct snd_intelhad *ctx = dev_get_drvdata(dev);
+       struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
         int err;
  
         err = hdmi_lpe_audio_runtime_suspend(dev);
         if (!err)
-               snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D3hot);
+               snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D3hot);
         return err;
  }
  
@@ -1654,24 +1703,34 @@ static int hdmi_lpe_audio_runtime_resume(struct device *dev)
  
  static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev)
  {
-       struct snd_intelhad *ctx = dev_get_drvdata(dev);
+       struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
  
         hdmi_lpe_audio_runtime_resume(dev);
-       snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D0);
+       snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D0);
         return 0;
  }
  
  /* release resources */
  static void hdmi_lpe_audio_free(struct snd_card *card)
  {
-       struct snd_intelhad *ctx = card->private_data;
+       struct snd_intelhad_card *card_ctx = card->private_data;
+       struct intel_hdmi_lpe_audio_pdata *pdata = card_ctx->dev->platform_data;
+       int port;
  
-       cancel_work_sync(&ctx->hdmi_audio_wq);
+       spin_lock_irq(&pdata->lpe_audio_slock);
+       pdata->notify_audio_lpe = NULL;
+       spin_unlock_irq(&pdata->lpe_audio_slock);
  
-       if (ctx->mmio_start)
-               iounmap(ctx->mmio_start);
-       if (ctx->irq >= 0)
-               free_irq(ctx->irq, ctx);
+       for_each_port(card_ctx, port) {
+               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+
+               cancel_work_sync(&ctx->hdmi_audio_wq);
+       }
+
+       if (card_ctx->mmio_start)
+               iounmap(card_ctx->mmio_start);
+       if (card_ctx->irq >= 0)
+               free_irq(card_ctx->irq, card_ctx);
  }
  
  /*
@@ -1683,12 +1742,12 @@ static void hdmi_lpe_audio_free(struct snd_card *card)
  static int hdmi_lpe_audio_probe(struct platform_device *pdev)
  {
         struct snd_card *card;
-       struct snd_intelhad *ctx;
+       struct snd_intelhad_card *card_ctx;
         struct snd_pcm *pcm;
         struct intel_hdmi_lpe_audio_pdata *pdata;
         int irq;
         struct resource *res_mmio;
-       int i, ret;
+       int port, ret;
  
         pdata = pdev->dev.platform_data;
         if (!pdata) {
@@ -1711,39 +1770,30 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
  
         /* create a card instance with ALSA framework */
         ret = snd_card_new(&pdev->dev, hdmi_card_index, hdmi_card_id,
-                          THIS_MODULE, sizeof(*ctx), &card);
+                          THIS_MODULE, sizeof(*card_ctx), &card);
         if (ret)
                 return ret;
  
-       ctx = card->private_data;
-       spin_lock_init(&ctx->had_spinlock);
-       mutex_init(&ctx->mutex);
-       ctx->connected = false;
-       ctx->dev = &pdev->dev;
-       ctx->card = card;
-       ctx->aes_bits = SNDRV_PCM_DEFAULT_CON_SPDIF;
+       card_ctx = card->private_data;
+       card_ctx->dev = &pdev->dev;
+       card_ctx->card = card;
         strcpy(card->driver, INTEL_HAD);
         strcpy(card->shortname, "Intel HDMI/DP LPE Audio");
         strcpy(card->longname, "Intel HDMI/DP LPE Audio");
  
-       ctx->irq = -1;
-       ctx->tmds_clock_speed = DIS_SAMPLE_RATE_148_5;
-       INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+       card_ctx->irq = -1;
  
         card->private_free = hdmi_lpe_audio_free;
  
-       /* assume pipe A as default */
-       ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-
-       platform_set_drvdata(pdev, ctx);
+       platform_set_drvdata(pdev, card_ctx);
  
         dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
                 __func__, (unsigned int)res_mmio->start,
                 (unsigned int)res_mmio->end);
  
-       ctx->mmio_start = ioremap_nocache(res_mmio->start,
-                                         (size_t)(resource_size(res_mmio)));
-       if (!ctx->mmio_start) {
+       card_ctx->mmio_start = ioremap_nocache(res_mmio->start,
+                                              (size_t)(resource_size(res_mmio)));
+       if (!card_ctx->mmio_start) {
                 dev_err(&pdev->dev, "Could not get ioremap\n");
                 ret = -EACCES;
                 goto err;
@@ -1751,54 +1801,79 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
  
         /* setup interrupt handler */
         ret = request_irq(irq, display_pipe_interrupt_handler, 0,
-                         pdev->name, ctx);
+                         pdev->name, card_ctx);
         if (ret < 0) {
                 dev_err(&pdev->dev, "request_irq failed\n");
                 goto err;
         }
  
-       ctx->irq = irq;
-
-       ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS,
-                         MAX_CAP_STREAMS, &pcm);
-       if (ret)
-               goto err;
-
-       /* setup private data which can be retrieved when required */
-       pcm->private_data = ctx;
-       pcm->info_flags = 0;
-       strncpy(pcm->name, card->shortname, strlen(card->shortname));
-       /* setup the ops for playabck */
-       snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
+       card_ctx->irq = irq;
  
         /* only 32bit addressable */
         dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
         dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
  
-       /* allocate dma pages;
-        * try to allocate 600k buffer as default which is large enough
-        */
-       snd_pcm_lib_preallocate_pages_for_all(pcm,
-                       SNDRV_DMA_TYPE_DEV, NULL,
-                       HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+       init_channel_allocations();
  
-       /* create controls */
-       for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
-               ret = snd_ctl_add(card, snd_ctl_new1(&had_controls[i], ctx));
-               if (ret < 0)
+       card_ctx->num_pipes = pdata->num_pipes;
+       card_ctx->num_ports = pdata->num_ports;
+
+       for_each_port(card_ctx, port) {
+               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+               int i;
+
+               ctx->card_ctx = card_ctx;
+               ctx->dev = card_ctx->dev;
+               ctx->port = port;
+               ctx->pipe = -1;
+
+               INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+
+               ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
+                                 MAX_CAP_STREAMS, &pcm);
+               if (ret)
                         goto err;
-       }
  
-       init_channel_allocations();
+               /* setup private data which can be retrieved when required */
+               pcm->private_data = ctx;
+               pcm->info_flags = 0;
+               strncpy(pcm->name, card->shortname, strlen(card->shortname));
+               /* setup the ops for playabck */
+               snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
  
-       /* Register channel map controls */
-       ret = had_register_chmap_ctls(ctx, pcm);
-       if (ret < 0)
-               goto err;
+               /* allocate dma pages;
+                * try to allocate 600k buffer as default which is large enough
+                */
+               snd_pcm_lib_preallocate_pages_for_all(pcm,
+                                                     SNDRV_DMA_TYPE_DEV, NULL,
+                                                     HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+
+               /* create controls */
+               for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
+                       struct snd_kcontrol *kctl;
+
+                       kctl = snd_ctl_new1(&had_controls[i], ctx);
+                       if (!kctl) {
+                               ret = -ENOMEM;
+                               goto err;
+                       }
  
-       ret = had_create_jack(ctx);
-       if (ret < 0)
-               goto err;
+                       kctl->id.device = pcm->device;
+
+                       ret = snd_ctl_add(card, kctl);
+                       if (ret < 0)
+                               goto err;
+               }
+
+               /* Register channel map controls */
+               ret = had_register_chmap_ctls(ctx, pcm);
+               if (ret < 0)
+                       goto err;
+
+               ret = had_create_jack(ctx, pcm);
+               if (ret < 0)
+                       goto err;
+       }
  
         ret = snd_card_register(card);
         if (ret)
@@ -1806,19 +1881,18 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
  
         spin_lock_irq(&pdata->lpe_audio_slock);
         pdata->notify_audio_lpe = notify_audio_lpe;
-       pdata->notify_pending = false;
         spin_unlock_irq(&pdata->lpe_audio_slock);
  
-       /* runtime PM isn't enabled as default, since it won't save much on
-        * BYT/CHT devices; user who want the runtime PM should adjust the
-        * power/ontrol and power/autosuspend_delay_ms sysfs entries instead
-        */
         pm_runtime_use_autosuspend(&pdev->dev);
         pm_runtime_mark_last_busy(&pdev->dev);
         pm_runtime_set_active(&pdev->dev);
  
         dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__);
-       schedule_work(&ctx->hdmi_audio_wq);
+       for_each_port(card_ctx, port) {
+               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+
+               schedule_work(&ctx->hdmi_audio_wq);
+       }
  
         return 0;
  
@@ -1834,9 +1908,9 @@ err:
   */
  static int hdmi_lpe_audio_remove(struct platform_device *pdev)
  {
-       struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+       struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
  
-       snd_card_free(ctx->card);
+       snd_card_free(card_ctx->card);
         return 0;
  }
  
diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h

index 2d3e389f76b317f8711c3827f8a3960b1f6eb96f..0d91bb5dbab7dd51d4804b202bec2ce1c2da0fc5 100644 (file)
--- a/sound/x86/intel_hdmi_audio.h
+++ b/sound/x86/intel_hdmi_audio.h
@@ -32,7 +32,6 @@
  
  #include "intel_hdmi_lpe_audio.h"
  
-#define PCM_INDEX              0
  #define MAX_PB_STREAMS         1
  #define MAX_CAP_STREAMS                0
  #define BYTES_PER_WORD         0x4
@@ -101,7 +100,7 @@ struct pcm_stream_info {
   * @chmap: holds channel map info
   */
  struct snd_intelhad {
-       struct snd_card *card;
+       struct snd_intelhad_card *card_ctx;
         bool            connected;
         struct          pcm_stream_info stream_info;
         unsigned char   eld[HDMI_MAX_ELD_BYTES];
@@ -112,6 +111,8 @@ struct snd_intelhad {
         struct snd_pcm_chmap *chmap;
         int tmds_clock_speed;
         int link_rate;
+       int port; /* fixed */
+       int pipe; /* can change dynamically */
  
         /* ring buffer (BD) position index */
         unsigned int bd_head;
@@ -123,9 +124,6 @@ struct snd_intelhad {
         unsigned int period_bytes;      /* PCM period size in bytes */
  
         /* internal stuff */
-       int irq;
-       void __iomem *mmio_start;
-       unsigned int had_config_offset;
         union aud_cfg aud_config;       /* AUD_CONFIG reg value cache */
         struct work_struct hdmi_audio_wq;
         struct mutex mutex; /* for protecting chmap and eld */
@@ -133,4 +131,16 @@ struct snd_intelhad {
         struct snd_jack *jack;
  };
  
+struct snd_intelhad_card {
+       struct snd_card *card;
+       struct device *dev;
+
+       /* internal stuff */
+       int irq;
+       void __iomem *mmio_start;
+       int num_pipes;
+       int num_ports;
+       struct snd_intelhad pcm_ctx[3]; /* one for each port */
+};
+
  #endif /* _INTEL_HDMI_AUDIO_ */
author	Dave Airlie <airlied@redhat.com>
	Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Tue, 30 May 2017 05:25:28 +0000 (15:25 +1000)
drivers/gpu/drm/i915/Kconfig.debug		patch \| blob \| history
drivers/gpu/drm/i915/Makefile		patch \| blob \| history
drivers/gpu/drm/i915/dvo_ch7017.c		patch \| blob \| history
drivers/gpu/drm/i915/gvt/scheduler.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_cmd_parser.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_clflush.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_clflush.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_dmabuf.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_execbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_gtt.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_object.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_shrinker.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_stolen.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_timeline.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_timeline.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gpu_error.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_guc_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_pci.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_perf.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_reg.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_sw_fence.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_sw_fence.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_syncmap.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/i915_syncmap.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/i915_sysfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_trace.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_utils.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_atomic_plane.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_audio.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_breadcrumbs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_cdclk.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_crt.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_device_info.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_display.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dp.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dp_aux_backlight.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dp_link_training.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dp_mst.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_dsi.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dsi_vbt.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dvo.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_fbc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_guc_ct.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/intel_guc_ct.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/intel_guc_fwif.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_guc_loader.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_guc_log.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_hangcheck.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_hdmi.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_huc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lpe_audio.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_panel.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_pipe_crc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_pm.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_sdvo.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_sprite.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_tv.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_uc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_uc.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_uncore.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_uncore.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/i915_gem_coherency.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_gem_object.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_gem_timeline.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/i915_mock_selftests.h		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_random.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_random.h		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_sw_fence.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/i915_syncmap.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/mock_engine.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/mock_gem_device.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/mock_timeline.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/mock_timeline.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/mock_uncore.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/mock_uncore.h	[new file with mode: 0644]	patch \| blob
include/drm/drm_dp_helper.h		patch \| blob \| history
include/drm/intel_lpe_audio.h		patch \| blob \| history
include/uapi/drm/i915_drm.h		patch \| blob \| history
sound/x86/intel_hdmi_audio.c		patch \| blob \| history
sound/x86/intel_hdmi_audio.h		patch \| blob \| history