174 lines
4.4 KiB
C
174 lines
4.4 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2023 Intel Corporation
|
|
*/
|
|
|
|
#include "i915_drv.h"
|
|
#include "i915_perf_oa_regs.h"
|
|
#include "intel_engine_pm.h"
|
|
#include "intel_gt.h"
|
|
#include "intel_gt_mcr.h"
|
|
#include "intel_gt_pm.h"
|
|
#include "intel_gt_print.h"
|
|
#include "intel_gt_regs.h"
|
|
#include "intel_tlb.h"
|
|
#include "uc/intel_guc.h"
|
|
|
|
/*
|
|
* HW architecture suggest typical invalidation time at 40us,
|
|
* with pessimistic cases up to 100us and a recommendation to
|
|
* cap at 1ms. We go a bit higher just in case.
|
|
*/
|
|
#define TLB_INVAL_TIMEOUT_US 100
|
|
#define TLB_INVAL_TIMEOUT_MS 4
|
|
|
|
/*
|
|
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
|
|
* but are now considered MCR registers. Since they exist within a GAM range,
|
|
* the primary instance of the register rolls up the status from each unit.
|
|
*/
|
|
static int wait_for_invalidate(struct intel_engine_cs *engine)
|
|
{
|
|
if (engine->tlb_inv.mcr)
|
|
return intel_gt_mcr_wait_for_reg(engine->gt,
|
|
engine->tlb_inv.reg.mcr_reg,
|
|
engine->tlb_inv.done,
|
|
0,
|
|
TLB_INVAL_TIMEOUT_US,
|
|
TLB_INVAL_TIMEOUT_MS);
|
|
else
|
|
return __intel_wait_for_register_fw(engine->gt->uncore,
|
|
engine->tlb_inv.reg.reg,
|
|
engine->tlb_inv.done,
|
|
0,
|
|
TLB_INVAL_TIMEOUT_US,
|
|
TLB_INVAL_TIMEOUT_MS,
|
|
NULL);
|
|
}
|
|
|
|
static void mmio_invalidate_full(struct intel_gt *gt)
|
|
{
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
struct intel_uncore *uncore = gt->uncore;
|
|
struct intel_engine_cs *engine;
|
|
intel_engine_mask_t awake, tmp;
|
|
enum intel_engine_id id;
|
|
unsigned long flags;
|
|
|
|
if (GRAPHICS_VER(i915) < 8)
|
|
return;
|
|
|
|
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
|
|
|
|
intel_gt_mcr_lock(gt, &flags);
|
|
spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
|
|
|
|
awake = 0;
|
|
for_each_engine(engine, gt, id) {
|
|
if (!intel_engine_pm_is_awake(engine))
|
|
continue;
|
|
|
|
if (engine->tlb_inv.mcr)
|
|
intel_gt_mcr_multicast_write_fw(gt,
|
|
engine->tlb_inv.reg.mcr_reg,
|
|
engine->tlb_inv.request);
|
|
else
|
|
intel_uncore_write_fw(uncore,
|
|
engine->tlb_inv.reg.reg,
|
|
engine->tlb_inv.request);
|
|
|
|
awake |= engine->mask;
|
|
}
|
|
|
|
GT_TRACE(gt, "invalidated engines %08x\n", awake);
|
|
|
|
/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
|
|
if (awake &&
|
|
(IS_TIGERLAKE(i915) ||
|
|
IS_DG1(i915) ||
|
|
IS_ROCKETLAKE(i915) ||
|
|
IS_ALDERLAKE_S(i915) ||
|
|
IS_ALDERLAKE_P(i915)))
|
|
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
|
|
|
|
spin_unlock(&uncore->lock);
|
|
intel_gt_mcr_unlock(gt, flags);
|
|
|
|
for_each_engine_masked(engine, gt, awake, tmp) {
|
|
if (wait_for_invalidate(engine))
|
|
gt_err_ratelimited(gt,
|
|
"%s TLB invalidation did not complete in %ums!\n",
|
|
engine->name, TLB_INVAL_TIMEOUT_MS);
|
|
}
|
|
|
|
/*
|
|
* Use delayed put since a) we mostly expect a flurry of TLB
|
|
* invalidations so it is good to avoid paying the forcewake cost and
|
|
* b) it works around a bug in Icelake which cannot cope with too rapid
|
|
* transitions.
|
|
*/
|
|
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
|
|
}
|
|
|
|
static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
|
|
{
|
|
u32 cur = intel_gt_tlb_seqno(gt);
|
|
|
|
/* Only skip if a *full* TLB invalidate barrier has passed */
|
|
return (s32)(cur - ALIGN(seqno, 2)) > 0;
|
|
}
|
|
|
|
void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
|
|
{
|
|
intel_wakeref_t wakeref;
|
|
|
|
if (is_mock_gt(gt))
|
|
return;
|
|
|
|
if (intel_gt_is_wedged(gt))
|
|
return;
|
|
|
|
if (tlb_seqno_passed(gt, seqno))
|
|
return;
|
|
|
|
with_intel_gt_pm_if_awake(gt, wakeref) {
|
|
struct intel_guc *guc = gt_to_guc(gt);
|
|
|
|
mutex_lock(>->tlb.invalidate_lock);
|
|
if (tlb_seqno_passed(gt, seqno))
|
|
goto unlock;
|
|
|
|
if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
|
|
/*
|
|
* Only perform GuC TLB invalidation if GuC is ready.
|
|
* The only time GuC could not be ready is on GT reset,
|
|
* which would clobber all the TLBs anyways, making
|
|
* any TLB invalidation path here unnecessary.
|
|
*/
|
|
if (intel_guc_is_ready(guc))
|
|
intel_guc_invalidate_tlb_engines(guc);
|
|
} else {
|
|
mmio_invalidate_full(gt);
|
|
}
|
|
|
|
write_seqcount_invalidate(>->tlb.seqno);
|
|
unlock:
|
|
mutex_unlock(>->tlb.invalidate_lock);
|
|
}
|
|
}
|
|
|
|
void intel_gt_init_tlb(struct intel_gt *gt)
|
|
{
|
|
mutex_init(>->tlb.invalidate_lock);
|
|
seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock);
|
|
}
|
|
|
|
void intel_gt_fini_tlb(struct intel_gt *gt)
|
|
{
|
|
mutex_destroy(>->tlb.invalidate_lock);
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
|
#include "selftest_tlb.c"
|
|
#endif
|