1*4882a593Smuzhiyun // SPDX-License-Identifier: MIT
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright © 2020 Intel Corporation
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/slab.h> /* fault-inject.h is not standalone! */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/fault-inject.h>
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include "i915_trace.h"
11*4882a593Smuzhiyun #include "intel_gt.h"
12*4882a593Smuzhiyun #include "intel_gtt.h"
13*4882a593Smuzhiyun
alloc_pt_dma(struct i915_address_space * vm,int sz)14*4882a593Smuzhiyun struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
15*4882a593Smuzhiyun {
16*4882a593Smuzhiyun if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
17*4882a593Smuzhiyun i915_gem_shrink_all(vm->i915);
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun return i915_gem_object_create_internal(vm->i915, sz);
20*4882a593Smuzhiyun }
21*4882a593Smuzhiyun
pin_pt_dma(struct i915_address_space * vm,struct drm_i915_gem_object * obj)22*4882a593Smuzhiyun int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
23*4882a593Smuzhiyun {
24*4882a593Smuzhiyun int err;
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun err = i915_gem_object_pin_pages(obj);
27*4882a593Smuzhiyun if (err)
28*4882a593Smuzhiyun return err;
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun i915_gem_object_make_unshrinkable(obj);
31*4882a593Smuzhiyun return 0;
32*4882a593Smuzhiyun }
33*4882a593Smuzhiyun
__i915_vm_close(struct i915_address_space * vm)34*4882a593Smuzhiyun void __i915_vm_close(struct i915_address_space *vm)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun struct i915_vma *vma, *vn;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
39*4882a593Smuzhiyun return;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
42*4882a593Smuzhiyun struct drm_i915_gem_object *obj = vma->obj;
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun /* Keep the obj (and hence the vma) alive as _we_ destroy it */
45*4882a593Smuzhiyun if (!kref_get_unless_zero(&obj->base.refcount))
46*4882a593Smuzhiyun continue;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
49*4882a593Smuzhiyun WARN_ON(__i915_vma_unbind(vma));
50*4882a593Smuzhiyun __i915_vma_put(vma);
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun i915_gem_object_put(obj);
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&vm->bound_list));
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun mutex_unlock(&vm->mutex);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
i915_address_space_fini(struct i915_address_space * vm)59*4882a593Smuzhiyun void i915_address_space_fini(struct i915_address_space *vm)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun drm_mm_takedown(&vm->mm);
62*4882a593Smuzhiyun mutex_destroy(&vm->mutex);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun
__i915_vm_release(struct work_struct * work)65*4882a593Smuzhiyun static void __i915_vm_release(struct work_struct *work)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun struct i915_address_space *vm =
68*4882a593Smuzhiyun container_of(work, struct i915_address_space, rcu.work);
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun vm->cleanup(vm);
71*4882a593Smuzhiyun i915_address_space_fini(vm);
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun kfree(vm);
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun
i915_vm_release(struct kref * kref)76*4882a593Smuzhiyun void i915_vm_release(struct kref *kref)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun struct i915_address_space *vm =
79*4882a593Smuzhiyun container_of(kref, struct i915_address_space, ref);
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun GEM_BUG_ON(i915_is_ggtt(vm));
82*4882a593Smuzhiyun trace_i915_ppgtt_release(vm);
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun queue_rcu_work(vm->i915->wq, &vm->rcu);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
i915_address_space_init(struct i915_address_space * vm,int subclass)87*4882a593Smuzhiyun void i915_address_space_init(struct i915_address_space *vm, int subclass)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun kref_init(&vm->ref);
90*4882a593Smuzhiyun INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
91*4882a593Smuzhiyun atomic_set(&vm->open, 1);
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /*
94*4882a593Smuzhiyun * The vm->mutex must be reclaim safe (for use in the shrinker).
95*4882a593Smuzhiyun * Do a dummy acquire now under fs_reclaim so that any allocation
96*4882a593Smuzhiyun * attempt holding the lock is immediately reported by lockdep.
97*4882a593Smuzhiyun */
98*4882a593Smuzhiyun mutex_init(&vm->mutex);
99*4882a593Smuzhiyun lockdep_set_subclass(&vm->mutex, subclass);
100*4882a593Smuzhiyun i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun GEM_BUG_ON(!vm->total);
103*4882a593Smuzhiyun drm_mm_init(&vm->mm, 0, vm->total);
104*4882a593Smuzhiyun vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun INIT_LIST_HEAD(&vm->bound_list);
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun
clear_pages(struct i915_vma * vma)109*4882a593Smuzhiyun void clear_pages(struct i915_vma *vma)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun GEM_BUG_ON(!vma->pages);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun if (vma->pages != vma->obj->mm.pages) {
114*4882a593Smuzhiyun sg_free_table(vma->pages);
115*4882a593Smuzhiyun kfree(vma->pages);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun vma->pages = NULL;
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun
__px_dma(struct drm_i915_gem_object * p)122*4882a593Smuzhiyun dma_addr_t __px_dma(struct drm_i915_gem_object *p)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun GEM_BUG_ON(!i915_gem_object_has_pages(p));
125*4882a593Smuzhiyun return sg_dma_address(p->mm.pages->sgl);
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
__px_page(struct drm_i915_gem_object * p)128*4882a593Smuzhiyun struct page *__px_page(struct drm_i915_gem_object *p)
129*4882a593Smuzhiyun {
130*4882a593Smuzhiyun GEM_BUG_ON(!i915_gem_object_has_pages(p));
131*4882a593Smuzhiyun return sg_page(p->mm.pages->sgl);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun void
fill_page_dma(struct drm_i915_gem_object * p,const u64 val,unsigned int count)135*4882a593Smuzhiyun fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun struct page *page = __px_page(p);
138*4882a593Smuzhiyun void *vaddr;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun vaddr = kmap(page);
141*4882a593Smuzhiyun memset64(vaddr, val, count);
142*4882a593Smuzhiyun clflush_cache_range(vaddr, PAGE_SIZE);
143*4882a593Smuzhiyun kunmap(page);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
poison_scratch_page(struct drm_i915_gem_object * scratch)146*4882a593Smuzhiyun static void poison_scratch_page(struct drm_i915_gem_object *scratch)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun struct sgt_iter sgt;
149*4882a593Smuzhiyun struct page *page;
150*4882a593Smuzhiyun u8 val;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun val = 0;
153*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
154*4882a593Smuzhiyun val = POISON_FREE;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun for_each_sgt_page(page, sgt, scratch->mm.pages) {
157*4882a593Smuzhiyun void *vaddr;
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun vaddr = kmap(page);
160*4882a593Smuzhiyun memset(vaddr, val, PAGE_SIZE);
161*4882a593Smuzhiyun kunmap(page);
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
setup_scratch_page(struct i915_address_space * vm)165*4882a593Smuzhiyun int setup_scratch_page(struct i915_address_space *vm)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun unsigned long size;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun /*
170*4882a593Smuzhiyun * In order to utilize 64K pages for an object with a size < 2M, we will
171*4882a593Smuzhiyun * need to support a 64K scratch page, given that every 16th entry for a
172*4882a593Smuzhiyun * page-table operating in 64K mode must point to a properly aligned 64K
173*4882a593Smuzhiyun * region, including any PTEs which happen to point to scratch.
174*4882a593Smuzhiyun *
175*4882a593Smuzhiyun * This is only relevant for the 48b PPGTT where we support
176*4882a593Smuzhiyun * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
177*4882a593Smuzhiyun * scratch (read-only) between all vm, we create one 64k scratch page
178*4882a593Smuzhiyun * for all.
179*4882a593Smuzhiyun */
180*4882a593Smuzhiyun size = I915_GTT_PAGE_SIZE_4K;
181*4882a593Smuzhiyun if (i915_vm_is_4lvl(vm) &&
182*4882a593Smuzhiyun HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
183*4882a593Smuzhiyun size = I915_GTT_PAGE_SIZE_64K;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun do {
186*4882a593Smuzhiyun struct drm_i915_gem_object *obj;
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun obj = vm->alloc_pt_dma(vm, size);
189*4882a593Smuzhiyun if (IS_ERR(obj))
190*4882a593Smuzhiyun goto skip;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun if (pin_pt_dma(vm, obj))
193*4882a593Smuzhiyun goto skip_obj;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun /* We need a single contiguous page for our scratch */
196*4882a593Smuzhiyun if (obj->mm.page_sizes.sg < size)
197*4882a593Smuzhiyun goto skip_obj;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /* And it needs to be correspondingly aligned */
200*4882a593Smuzhiyun if (__px_dma(obj) & (size - 1))
201*4882a593Smuzhiyun goto skip_obj;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun /*
204*4882a593Smuzhiyun * Use a non-zero scratch page for debugging.
205*4882a593Smuzhiyun *
206*4882a593Smuzhiyun * We want a value that should be reasonably obvious
207*4882a593Smuzhiyun * to spot in the error state, while also causing a GPU hang
208*4882a593Smuzhiyun * if executed. We prefer using a clear page in production, so
209*4882a593Smuzhiyun * should it ever be accidentally used, the effect should be
210*4882a593Smuzhiyun * fairly benign.
211*4882a593Smuzhiyun */
212*4882a593Smuzhiyun poison_scratch_page(obj);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun vm->scratch[0] = obj;
215*4882a593Smuzhiyun vm->scratch_order = get_order(size);
216*4882a593Smuzhiyun return 0;
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun skip_obj:
219*4882a593Smuzhiyun i915_gem_object_put(obj);
220*4882a593Smuzhiyun skip:
221*4882a593Smuzhiyun if (size == I915_GTT_PAGE_SIZE_4K)
222*4882a593Smuzhiyun return -ENOMEM;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun size = I915_GTT_PAGE_SIZE_4K;
225*4882a593Smuzhiyun } while (1);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
free_scratch(struct i915_address_space * vm)228*4882a593Smuzhiyun void free_scratch(struct i915_address_space *vm)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun int i;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun for (i = 0; i <= vm->top; i++)
233*4882a593Smuzhiyun i915_gem_object_put(vm->scratch[i]);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
gtt_write_workarounds(struct intel_gt * gt)236*4882a593Smuzhiyun void gtt_write_workarounds(struct intel_gt *gt)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun struct drm_i915_private *i915 = gt->i915;
239*4882a593Smuzhiyun struct intel_uncore *uncore = gt->uncore;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun /*
242*4882a593Smuzhiyun * This function is for gtt related workarounds. This function is
243*4882a593Smuzhiyun * called on driver load and after a GPU reset, so you can place
244*4882a593Smuzhiyun * workarounds here even if they get overwritten by GPU reset.
245*4882a593Smuzhiyun */
246*4882a593Smuzhiyun /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
247*4882a593Smuzhiyun if (IS_BROADWELL(i915))
248*4882a593Smuzhiyun intel_uncore_write(uncore,
249*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU,
250*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
251*4882a593Smuzhiyun else if (IS_CHERRYVIEW(i915))
252*4882a593Smuzhiyun intel_uncore_write(uncore,
253*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU,
254*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
255*4882a593Smuzhiyun else if (IS_GEN9_LP(i915))
256*4882a593Smuzhiyun intel_uncore_write(uncore,
257*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU,
258*4882a593Smuzhiyun GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
259*4882a593Smuzhiyun else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
260*4882a593Smuzhiyun intel_uncore_write(uncore,
261*4882a593Smuzhiyun GEN8_L3_LRA_1_GPGPU,
262*4882a593Smuzhiyun GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun /*
265*4882a593Smuzhiyun * To support 64K PTEs we need to first enable the use of the
266*4882a593Smuzhiyun * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
267*4882a593Smuzhiyun * mmio, otherwise the page-walker will simply ignore the IPS bit. This
268*4882a593Smuzhiyun * shouldn't be needed after GEN10.
269*4882a593Smuzhiyun *
270*4882a593Smuzhiyun * 64K pages were first introduced from BDW+, although technically they
271*4882a593Smuzhiyun * only *work* from gen9+. For pre-BDW we instead have the option for
272*4882a593Smuzhiyun * 32K pages, but we don't currently have any support for it in our
273*4882a593Smuzhiyun * driver.
274*4882a593Smuzhiyun */
275*4882a593Smuzhiyun if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
276*4882a593Smuzhiyun INTEL_GEN(i915) <= 10)
277*4882a593Smuzhiyun intel_uncore_rmw(uncore,
278*4882a593Smuzhiyun GEN8_GAMW_ECO_DEV_RW_IA,
279*4882a593Smuzhiyun 0,
280*4882a593Smuzhiyun GAMW_ECO_ENABLE_64K_IPS_FIELD);
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun if (IS_GEN_RANGE(i915, 8, 11)) {
283*4882a593Smuzhiyun bool can_use_gtt_cache = true;
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /*
286*4882a593Smuzhiyun * According to the BSpec if we use 2M/1G pages then we also
287*4882a593Smuzhiyun * need to disable the GTT cache. At least on BDW we can see
288*4882a593Smuzhiyun * visual corruption when using 2M pages, and not disabling the
289*4882a593Smuzhiyun * GTT cache.
290*4882a593Smuzhiyun */
291*4882a593Smuzhiyun if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
292*4882a593Smuzhiyun can_use_gtt_cache = false;
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun /* WaGttCachingOffByDefault */
295*4882a593Smuzhiyun intel_uncore_write(uncore,
296*4882a593Smuzhiyun HSW_GTT_CACHE_EN,
297*4882a593Smuzhiyun can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
298*4882a593Smuzhiyun drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
299*4882a593Smuzhiyun intel_uncore_read(uncore,
300*4882a593Smuzhiyun HSW_GTT_CACHE_EN) == 0);
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
tgl_setup_private_ppat(struct intel_uncore * uncore)304*4882a593Smuzhiyun static void tgl_setup_private_ppat(struct intel_uncore *uncore)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun /* TGL doesn't support LLC or AGE settings */
307*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
308*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
309*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
310*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
311*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
312*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
313*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
314*4882a593Smuzhiyun intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
cnl_setup_private_ppat(struct intel_uncore * uncore)317*4882a593Smuzhiyun static void cnl_setup_private_ppat(struct intel_uncore *uncore)
318*4882a593Smuzhiyun {
319*4882a593Smuzhiyun intel_uncore_write(uncore,
320*4882a593Smuzhiyun GEN10_PAT_INDEX(0),
321*4882a593Smuzhiyun GEN8_PPAT_WB | GEN8_PPAT_LLC);
322*4882a593Smuzhiyun intel_uncore_write(uncore,
323*4882a593Smuzhiyun GEN10_PAT_INDEX(1),
324*4882a593Smuzhiyun GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
325*4882a593Smuzhiyun intel_uncore_write(uncore,
326*4882a593Smuzhiyun GEN10_PAT_INDEX(2),
327*4882a593Smuzhiyun GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
328*4882a593Smuzhiyun intel_uncore_write(uncore,
329*4882a593Smuzhiyun GEN10_PAT_INDEX(3),
330*4882a593Smuzhiyun GEN8_PPAT_UC);
331*4882a593Smuzhiyun intel_uncore_write(uncore,
332*4882a593Smuzhiyun GEN10_PAT_INDEX(4),
333*4882a593Smuzhiyun GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
334*4882a593Smuzhiyun intel_uncore_write(uncore,
335*4882a593Smuzhiyun GEN10_PAT_INDEX(5),
336*4882a593Smuzhiyun GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
337*4882a593Smuzhiyun intel_uncore_write(uncore,
338*4882a593Smuzhiyun GEN10_PAT_INDEX(6),
339*4882a593Smuzhiyun GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
340*4882a593Smuzhiyun intel_uncore_write(uncore,
341*4882a593Smuzhiyun GEN10_PAT_INDEX(7),
342*4882a593Smuzhiyun GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun /*
346*4882a593Smuzhiyun * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
347*4882a593Smuzhiyun * bits. When using advanced contexts each context stores its own PAT, but
348*4882a593Smuzhiyun * writing this data shouldn't be harmful even in those cases.
349*4882a593Smuzhiyun */
bdw_setup_private_ppat(struct intel_uncore * uncore)350*4882a593Smuzhiyun static void bdw_setup_private_ppat(struct intel_uncore *uncore)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun u64 pat;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
355*4882a593Smuzhiyun GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
356*4882a593Smuzhiyun GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
357*4882a593Smuzhiyun GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
358*4882a593Smuzhiyun GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
359*4882a593Smuzhiyun GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
360*4882a593Smuzhiyun GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
361*4882a593Smuzhiyun GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
364*4882a593Smuzhiyun intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun
chv_setup_private_ppat(struct intel_uncore * uncore)367*4882a593Smuzhiyun static void chv_setup_private_ppat(struct intel_uncore *uncore)
368*4882a593Smuzhiyun {
369*4882a593Smuzhiyun u64 pat;
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /*
372*4882a593Smuzhiyun * Map WB on BDW to snooped on CHV.
373*4882a593Smuzhiyun *
374*4882a593Smuzhiyun * Only the snoop bit has meaning for CHV, the rest is
375*4882a593Smuzhiyun * ignored.
376*4882a593Smuzhiyun *
377*4882a593Smuzhiyun * The hardware will never snoop for certain types of accesses:
378*4882a593Smuzhiyun * - CPU GTT (GMADR->GGTT->no snoop->memory)
379*4882a593Smuzhiyun * - PPGTT page tables
380*4882a593Smuzhiyun * - some other special cycles
381*4882a593Smuzhiyun *
382*4882a593Smuzhiyun * As with BDW, we also need to consider the following for GT accesses:
383*4882a593Smuzhiyun * "For GGTT, there is NO pat_sel[2:0] from the entry,
384*4882a593Smuzhiyun * so RTL will always use the value corresponding to
385*4882a593Smuzhiyun * pat_sel = 000".
386*4882a593Smuzhiyun * Which means we must set the snoop bit in PAT entry 0
387*4882a593Smuzhiyun * in order to keep the global status page working.
388*4882a593Smuzhiyun */
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
391*4882a593Smuzhiyun GEN8_PPAT(1, 0) |
392*4882a593Smuzhiyun GEN8_PPAT(2, 0) |
393*4882a593Smuzhiyun GEN8_PPAT(3, 0) |
394*4882a593Smuzhiyun GEN8_PPAT(4, CHV_PPAT_SNOOP) |
395*4882a593Smuzhiyun GEN8_PPAT(5, CHV_PPAT_SNOOP) |
396*4882a593Smuzhiyun GEN8_PPAT(6, CHV_PPAT_SNOOP) |
397*4882a593Smuzhiyun GEN8_PPAT(7, CHV_PPAT_SNOOP);
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
400*4882a593Smuzhiyun intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun
setup_private_pat(struct intel_uncore * uncore)403*4882a593Smuzhiyun void setup_private_pat(struct intel_uncore *uncore)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun struct drm_i915_private *i915 = uncore->i915;
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun GEM_BUG_ON(INTEL_GEN(i915) < 8);
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun if (INTEL_GEN(i915) >= 12)
410*4882a593Smuzhiyun tgl_setup_private_ppat(uncore);
411*4882a593Smuzhiyun else if (INTEL_GEN(i915) >= 10)
412*4882a593Smuzhiyun cnl_setup_private_ppat(uncore);
413*4882a593Smuzhiyun else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
414*4882a593Smuzhiyun chv_setup_private_ppat(uncore);
415*4882a593Smuzhiyun else
416*4882a593Smuzhiyun bdw_setup_private_ppat(uncore);
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
420*4882a593Smuzhiyun #include "selftests/mock_gtt.c"
421*4882a593Smuzhiyun #endif
422