From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 14 Sep 2018 16:10:18 +0800 Subject: [PATCH] drm/i915/gvt: Added error interrupt handler for GVT-g guest An draft version of adding error interrupt handler in GVT-g guest, to trigger a GPU reset when receiving the CS error interrupt. v2: Updated by JH - made the worker object local to the engine rather than a global. Also, don't re-initialise it in the interrupt handler otherwise null pointer dereferences ensue when the interrupts occur too quickly. In the Xen virtualised environment, the TDR is a co-operative effort between the host and guest domains. The actual detection is done entirely by the host as only it really knows what the hardware is doing. The guest still needs to do all the recovery processing when informed about a TDR event occuring by the host. However, the guest should not attempt to do the detection itself. Change-Id: I3edac5211f9878725b14abaab7cacf9048ccd620 Signed-off-by: Min He Signed-off-by: John Harrison Signed-off-by: Satyeshwar Singh Reviewed-on: Reviewed-by: Jiang, Fei Reviewed-by: Dong, Eddie Tested-by: Dong, Eddie --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_hangcheck.c | 3 +++ drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f9ce35d..892a5e7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4000,6 +4000,9 @@ i915_wedged_set(void *data, u64 val) struct intel_engine_cs *engine; unsigned int tmp; + if (intel_vgpu_active(i915)) + return -EINVAL; + /* * There is no safeguard against this debugfs entry colliding * with the hangcheck calling same i915_handle_error() in diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0b16775..71977fb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1518,6 +1518,12 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet |= USES_GUC_SUBMISSION(engine->i915); } + if ((iir & (GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) && + intel_vgpu_active(engine->i915)) { + queue_work(system_highpri_wq, &engine->reset_work); + return; + } + if (tasklet) tasklet_hi_schedule(&engine->execlists.tasklet); } @@ -4159,6 +4165,19 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) if (HAS_L3_DPF(dev_priv)) gt_interrupts[0] |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + if (intel_vgpu_active(dev_priv)) { + gt_interrupts[0] |= GT_RENDER_CS_MASTER_ERROR_INTERRUPT << + GEN8_RCS_IRQ_SHIFT | + GT_RENDER_CS_MASTER_ERROR_INTERRUPT << + GEN8_BCS_IRQ_SHIFT; + gt_interrupts[1] |= GT_RENDER_CS_MASTER_ERROR_INTERRUPT << + GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_CS_MASTER_ERROR_INTERRUPT << + GEN8_VCS2_IRQ_SHIFT; + gt_interrupts[3] |= GT_RENDER_CS_MASTER_ERROR_INTERRUPT << + GEN8_VECS_IRQ_SHIFT; + } + dev_priv->pm_ier = 0x0; dev_priv->pm_imr = ~dev_priv->pm_ier; GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 2fc7a0d..1f7da1c 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -418,6 +418,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!i915_modparams.enable_hangcheck) return; + if (intel_vgpu_active(dev_priv)) + return; + if (!READ_ONCE(dev_priv->gt.awake)) return; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6937ef0..71e4d09 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2386,6 +2386,16 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } +static void i915_error_reset(struct work_struct *work) { + struct intel_engine_cs *engine = + container_of(work, struct intel_engine_cs, + reset_work); + i915_handle_error(engine->i915, 1 << engine->id, + I915_ERROR_CAPTURE, + "Received error interrupt from engine %d", + engine->id); +} + static void logical_ring_setup(struct intel_engine_cs *engine) { @@ -2399,6 +2409,8 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + + INIT_WORK(&engine->reset_work, i915_error_reset); } static bool csb_force_mmio(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f5ffa6d..93daa92 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -567,6 +567,7 @@ struct intel_engine_cs { } semaphore; struct intel_engine_execlists execlists; + struct work_struct reset_work; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the -- https://clearlinux.org