xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/i915/gt/intel_gtt.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: MIT
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright © 2020 Intel Corporation
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/slab.h> /* fault-inject.h is not standalone! */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/fault-inject.h>
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include "i915_trace.h"
11*4882a593Smuzhiyun #include "intel_gt.h"
12*4882a593Smuzhiyun #include "intel_gtt.h"
13*4882a593Smuzhiyun 
alloc_pt_dma(struct i915_address_space * vm,int sz)14*4882a593Smuzhiyun struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
15*4882a593Smuzhiyun {
16*4882a593Smuzhiyun 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
17*4882a593Smuzhiyun 		i915_gem_shrink_all(vm->i915);
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun 	return i915_gem_object_create_internal(vm->i915, sz);
20*4882a593Smuzhiyun }
21*4882a593Smuzhiyun 
pin_pt_dma(struct i915_address_space * vm,struct drm_i915_gem_object * obj)22*4882a593Smuzhiyun int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
23*4882a593Smuzhiyun {
24*4882a593Smuzhiyun 	int err;
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	err = i915_gem_object_pin_pages(obj);
27*4882a593Smuzhiyun 	if (err)
28*4882a593Smuzhiyun 		return err;
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun 	i915_gem_object_make_unshrinkable(obj);
31*4882a593Smuzhiyun 	return 0;
32*4882a593Smuzhiyun }
33*4882a593Smuzhiyun 
__i915_vm_close(struct i915_address_space * vm)34*4882a593Smuzhiyun void __i915_vm_close(struct i915_address_space *vm)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun 	struct i915_vma *vma, *vn;
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun 	if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
39*4882a593Smuzhiyun 		return;
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
42*4882a593Smuzhiyun 		struct drm_i915_gem_object *obj = vma->obj;
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
45*4882a593Smuzhiyun 		if (!kref_get_unless_zero(&obj->base.refcount))
46*4882a593Smuzhiyun 			continue;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
49*4882a593Smuzhiyun 		WARN_ON(__i915_vma_unbind(vma));
50*4882a593Smuzhiyun 		__i915_vma_put(vma);
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 		i915_gem_object_put(obj);
53*4882a593Smuzhiyun 	}
54*4882a593Smuzhiyun 	GEM_BUG_ON(!list_empty(&vm->bound_list));
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	mutex_unlock(&vm->mutex);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun 
i915_address_space_fini(struct i915_address_space * vm)59*4882a593Smuzhiyun void i915_address_space_fini(struct i915_address_space *vm)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun 	drm_mm_takedown(&vm->mm);
62*4882a593Smuzhiyun 	mutex_destroy(&vm->mutex);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
__i915_vm_release(struct work_struct * work)65*4882a593Smuzhiyun static void __i915_vm_release(struct work_struct *work)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	struct i915_address_space *vm =
68*4882a593Smuzhiyun 		container_of(work, struct i915_address_space, rcu.work);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	vm->cleanup(vm);
71*4882a593Smuzhiyun 	i915_address_space_fini(vm);
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	kfree(vm);
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun 
i915_vm_release(struct kref * kref)76*4882a593Smuzhiyun void i915_vm_release(struct kref *kref)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun 	struct i915_address_space *vm =
79*4882a593Smuzhiyun 		container_of(kref, struct i915_address_space, ref);
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	GEM_BUG_ON(i915_is_ggtt(vm));
82*4882a593Smuzhiyun 	trace_i915_ppgtt_release(vm);
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	queue_rcu_work(vm->i915->wq, &vm->rcu);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun 
i915_address_space_init(struct i915_address_space * vm,int subclass)87*4882a593Smuzhiyun void i915_address_space_init(struct i915_address_space *vm, int subclass)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun 	kref_init(&vm->ref);
90*4882a593Smuzhiyun 	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
91*4882a593Smuzhiyun 	atomic_set(&vm->open, 1);
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	/*
94*4882a593Smuzhiyun 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
95*4882a593Smuzhiyun 	 * Do a dummy acquire now under fs_reclaim so that any allocation
96*4882a593Smuzhiyun 	 * attempt holding the lock is immediately reported by lockdep.
97*4882a593Smuzhiyun 	 */
98*4882a593Smuzhiyun 	mutex_init(&vm->mutex);
99*4882a593Smuzhiyun 	lockdep_set_subclass(&vm->mutex, subclass);
100*4882a593Smuzhiyun 	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	GEM_BUG_ON(!vm->total);
103*4882a593Smuzhiyun 	drm_mm_init(&vm->mm, 0, vm->total);
104*4882a593Smuzhiyun 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	INIT_LIST_HEAD(&vm->bound_list);
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun 
clear_pages(struct i915_vma * vma)109*4882a593Smuzhiyun void clear_pages(struct i915_vma *vma)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun 	GEM_BUG_ON(!vma->pages);
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	if (vma->pages != vma->obj->mm.pages) {
114*4882a593Smuzhiyun 		sg_free_table(vma->pages);
115*4882a593Smuzhiyun 		kfree(vma->pages);
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 	vma->pages = NULL;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun 
__px_dma(struct drm_i915_gem_object * p)122*4882a593Smuzhiyun dma_addr_t __px_dma(struct drm_i915_gem_object *p)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun 	GEM_BUG_ON(!i915_gem_object_has_pages(p));
125*4882a593Smuzhiyun 	return sg_dma_address(p->mm.pages->sgl);
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun 
__px_page(struct drm_i915_gem_object * p)128*4882a593Smuzhiyun struct page *__px_page(struct drm_i915_gem_object *p)
129*4882a593Smuzhiyun {
130*4882a593Smuzhiyun 	GEM_BUG_ON(!i915_gem_object_has_pages(p));
131*4882a593Smuzhiyun 	return sg_page(p->mm.pages->sgl);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun void
fill_page_dma(struct drm_i915_gem_object * p,const u64 val,unsigned int count)135*4882a593Smuzhiyun fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun 	struct page *page = __px_page(p);
138*4882a593Smuzhiyun 	void *vaddr;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	vaddr = kmap(page);
141*4882a593Smuzhiyun 	memset64(vaddr, val, count);
142*4882a593Smuzhiyun 	clflush_cache_range(vaddr, PAGE_SIZE);
143*4882a593Smuzhiyun 	kunmap(page);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun 
poison_scratch_page(struct drm_i915_gem_object * scratch)146*4882a593Smuzhiyun static void poison_scratch_page(struct drm_i915_gem_object *scratch)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun 	struct sgt_iter sgt;
149*4882a593Smuzhiyun 	struct page *page;
150*4882a593Smuzhiyun 	u8 val;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	val = 0;
153*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
154*4882a593Smuzhiyun 		val = POISON_FREE;
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	for_each_sgt_page(page, sgt, scratch->mm.pages) {
157*4882a593Smuzhiyun 		void *vaddr;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		vaddr = kmap(page);
160*4882a593Smuzhiyun 		memset(vaddr, val, PAGE_SIZE);
161*4882a593Smuzhiyun 		kunmap(page);
162*4882a593Smuzhiyun 	}
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
setup_scratch_page(struct i915_address_space * vm)165*4882a593Smuzhiyun int setup_scratch_page(struct i915_address_space *vm)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun 	unsigned long size;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	/*
170*4882a593Smuzhiyun 	 * In order to utilize 64K pages for an object with a size < 2M, we will
171*4882a593Smuzhiyun 	 * need to support a 64K scratch page, given that every 16th entry for a
172*4882a593Smuzhiyun 	 * page-table operating in 64K mode must point to a properly aligned 64K
173*4882a593Smuzhiyun 	 * region, including any PTEs which happen to point to scratch.
174*4882a593Smuzhiyun 	 *
175*4882a593Smuzhiyun 	 * This is only relevant for the 48b PPGTT where we support
176*4882a593Smuzhiyun 	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
177*4882a593Smuzhiyun 	 * scratch (read-only) between all vm, we create one 64k scratch page
178*4882a593Smuzhiyun 	 * for all.
179*4882a593Smuzhiyun 	 */
180*4882a593Smuzhiyun 	size = I915_GTT_PAGE_SIZE_4K;
181*4882a593Smuzhiyun 	if (i915_vm_is_4lvl(vm) &&
182*4882a593Smuzhiyun 	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
183*4882a593Smuzhiyun 		size = I915_GTT_PAGE_SIZE_64K;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	do {
186*4882a593Smuzhiyun 		struct drm_i915_gem_object *obj;
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 		obj = vm->alloc_pt_dma(vm, size);
189*4882a593Smuzhiyun 		if (IS_ERR(obj))
190*4882a593Smuzhiyun 			goto skip;
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 		if (pin_pt_dma(vm, obj))
193*4882a593Smuzhiyun 			goto skip_obj;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 		/* We need a single contiguous page for our scratch */
196*4882a593Smuzhiyun 		if (obj->mm.page_sizes.sg < size)
197*4882a593Smuzhiyun 			goto skip_obj;
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 		/* And it needs to be correspondingly aligned */
200*4882a593Smuzhiyun 		if (__px_dma(obj) & (size - 1))
201*4882a593Smuzhiyun 			goto skip_obj;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 		/*
204*4882a593Smuzhiyun 		 * Use a non-zero scratch page for debugging.
205*4882a593Smuzhiyun 		 *
206*4882a593Smuzhiyun 		 * We want a value that should be reasonably obvious
207*4882a593Smuzhiyun 		 * to spot in the error state, while also causing a GPU hang
208*4882a593Smuzhiyun 		 * if executed. We prefer using a clear page in production, so
209*4882a593Smuzhiyun 		 * should it ever be accidentally used, the effect should be
210*4882a593Smuzhiyun 		 * fairly benign.
211*4882a593Smuzhiyun 		 */
212*4882a593Smuzhiyun 		poison_scratch_page(obj);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 		vm->scratch[0] = obj;
215*4882a593Smuzhiyun 		vm->scratch_order = get_order(size);
216*4882a593Smuzhiyun 		return 0;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun skip_obj:
219*4882a593Smuzhiyun 		i915_gem_object_put(obj);
220*4882a593Smuzhiyun skip:
221*4882a593Smuzhiyun 		if (size == I915_GTT_PAGE_SIZE_4K)
222*4882a593Smuzhiyun 			return -ENOMEM;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 		size = I915_GTT_PAGE_SIZE_4K;
225*4882a593Smuzhiyun 	} while (1);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun 
free_scratch(struct i915_address_space * vm)228*4882a593Smuzhiyun void free_scratch(struct i915_address_space *vm)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun 	int i;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	for (i = 0; i <= vm->top; i++)
233*4882a593Smuzhiyun 		i915_gem_object_put(vm->scratch[i]);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
gtt_write_workarounds(struct intel_gt * gt)236*4882a593Smuzhiyun void gtt_write_workarounds(struct intel_gt *gt)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun 	struct drm_i915_private *i915 = gt->i915;
239*4882a593Smuzhiyun 	struct intel_uncore *uncore = gt->uncore;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	/*
242*4882a593Smuzhiyun 	 * This function is for gtt related workarounds. This function is
243*4882a593Smuzhiyun 	 * called on driver load and after a GPU reset, so you can place
244*4882a593Smuzhiyun 	 * workarounds here even if they get overwritten by GPU reset.
245*4882a593Smuzhiyun 	 */
246*4882a593Smuzhiyun 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
247*4882a593Smuzhiyun 	if (IS_BROADWELL(i915))
248*4882a593Smuzhiyun 		intel_uncore_write(uncore,
249*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU,
250*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
251*4882a593Smuzhiyun 	else if (IS_CHERRYVIEW(i915))
252*4882a593Smuzhiyun 		intel_uncore_write(uncore,
253*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU,
254*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
255*4882a593Smuzhiyun 	else if (IS_GEN9_LP(i915))
256*4882a593Smuzhiyun 		intel_uncore_write(uncore,
257*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU,
258*4882a593Smuzhiyun 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
259*4882a593Smuzhiyun 	else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
260*4882a593Smuzhiyun 		intel_uncore_write(uncore,
261*4882a593Smuzhiyun 				   GEN8_L3_LRA_1_GPGPU,
262*4882a593Smuzhiyun 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	/*
265*4882a593Smuzhiyun 	 * To support 64K PTEs we need to first enable the use of the
266*4882a593Smuzhiyun 	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
267*4882a593Smuzhiyun 	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
268*4882a593Smuzhiyun 	 * shouldn't be needed after GEN10.
269*4882a593Smuzhiyun 	 *
270*4882a593Smuzhiyun 	 * 64K pages were first introduced from BDW+, although technically they
271*4882a593Smuzhiyun 	 * only *work* from gen9+. For pre-BDW we instead have the option for
272*4882a593Smuzhiyun 	 * 32K pages, but we don't currently have any support for it in our
273*4882a593Smuzhiyun 	 * driver.
274*4882a593Smuzhiyun 	 */
275*4882a593Smuzhiyun 	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
276*4882a593Smuzhiyun 	    INTEL_GEN(i915) <= 10)
277*4882a593Smuzhiyun 		intel_uncore_rmw(uncore,
278*4882a593Smuzhiyun 				 GEN8_GAMW_ECO_DEV_RW_IA,
279*4882a593Smuzhiyun 				 0,
280*4882a593Smuzhiyun 				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	if (IS_GEN_RANGE(i915, 8, 11)) {
283*4882a593Smuzhiyun 		bool can_use_gtt_cache = true;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 		/*
286*4882a593Smuzhiyun 		 * According to the BSpec if we use 2M/1G pages then we also
287*4882a593Smuzhiyun 		 * need to disable the GTT cache. At least on BDW we can see
288*4882a593Smuzhiyun 		 * visual corruption when using 2M pages, and not disabling the
289*4882a593Smuzhiyun 		 * GTT cache.
290*4882a593Smuzhiyun 		 */
291*4882a593Smuzhiyun 		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
292*4882a593Smuzhiyun 			can_use_gtt_cache = false;
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 		/* WaGttCachingOffByDefault */
295*4882a593Smuzhiyun 		intel_uncore_write(uncore,
296*4882a593Smuzhiyun 				   HSW_GTT_CACHE_EN,
297*4882a593Smuzhiyun 				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
298*4882a593Smuzhiyun 		drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
299*4882a593Smuzhiyun 				 intel_uncore_read(uncore,
300*4882a593Smuzhiyun 						   HSW_GTT_CACHE_EN) == 0);
301*4882a593Smuzhiyun 	}
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun 
tgl_setup_private_ppat(struct intel_uncore * uncore)304*4882a593Smuzhiyun static void tgl_setup_private_ppat(struct intel_uncore *uncore)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun 	/* TGL doesn't support LLC or AGE settings */
307*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
308*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
309*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
310*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
311*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
312*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
313*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
314*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun 
cnl_setup_private_ppat(struct intel_uncore * uncore)317*4882a593Smuzhiyun static void cnl_setup_private_ppat(struct intel_uncore *uncore)
318*4882a593Smuzhiyun {
319*4882a593Smuzhiyun 	intel_uncore_write(uncore,
320*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(0),
321*4882a593Smuzhiyun 			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
322*4882a593Smuzhiyun 	intel_uncore_write(uncore,
323*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(1),
324*4882a593Smuzhiyun 			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
325*4882a593Smuzhiyun 	intel_uncore_write(uncore,
326*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(2),
327*4882a593Smuzhiyun 			   GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
328*4882a593Smuzhiyun 	intel_uncore_write(uncore,
329*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(3),
330*4882a593Smuzhiyun 			   GEN8_PPAT_UC);
331*4882a593Smuzhiyun 	intel_uncore_write(uncore,
332*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(4),
333*4882a593Smuzhiyun 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
334*4882a593Smuzhiyun 	intel_uncore_write(uncore,
335*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(5),
336*4882a593Smuzhiyun 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
337*4882a593Smuzhiyun 	intel_uncore_write(uncore,
338*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(6),
339*4882a593Smuzhiyun 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
340*4882a593Smuzhiyun 	intel_uncore_write(uncore,
341*4882a593Smuzhiyun 			   GEN10_PAT_INDEX(7),
342*4882a593Smuzhiyun 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun /*
346*4882a593Smuzhiyun  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
347*4882a593Smuzhiyun  * bits. When using advanced contexts each context stores its own PAT, but
348*4882a593Smuzhiyun  * writing this data shouldn't be harmful even in those cases.
349*4882a593Smuzhiyun  */
bdw_setup_private_ppat(struct intel_uncore * uncore)350*4882a593Smuzhiyun static void bdw_setup_private_ppat(struct intel_uncore *uncore)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	u64 pat;
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
355*4882a593Smuzhiyun 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
356*4882a593Smuzhiyun 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |	/* for scanout with eLLC */
357*4882a593Smuzhiyun 	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
358*4882a593Smuzhiyun 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
359*4882a593Smuzhiyun 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
360*4882a593Smuzhiyun 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
361*4882a593Smuzhiyun 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
364*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun 
chv_setup_private_ppat(struct intel_uncore * uncore)367*4882a593Smuzhiyun static void chv_setup_private_ppat(struct intel_uncore *uncore)
368*4882a593Smuzhiyun {
369*4882a593Smuzhiyun 	u64 pat;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	/*
372*4882a593Smuzhiyun 	 * Map WB on BDW to snooped on CHV.
373*4882a593Smuzhiyun 	 *
374*4882a593Smuzhiyun 	 * Only the snoop bit has meaning for CHV, the rest is
375*4882a593Smuzhiyun 	 * ignored.
376*4882a593Smuzhiyun 	 *
377*4882a593Smuzhiyun 	 * The hardware will never snoop for certain types of accesses:
378*4882a593Smuzhiyun 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
379*4882a593Smuzhiyun 	 * - PPGTT page tables
380*4882a593Smuzhiyun 	 * - some other special cycles
381*4882a593Smuzhiyun 	 *
382*4882a593Smuzhiyun 	 * As with BDW, we also need to consider the following for GT accesses:
383*4882a593Smuzhiyun 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
384*4882a593Smuzhiyun 	 * so RTL will always use the value corresponding to
385*4882a593Smuzhiyun 	 * pat_sel = 000".
386*4882a593Smuzhiyun 	 * Which means we must set the snoop bit in PAT entry 0
387*4882a593Smuzhiyun 	 * in order to keep the global status page working.
388*4882a593Smuzhiyun 	 */
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
391*4882a593Smuzhiyun 	      GEN8_PPAT(1, 0) |
392*4882a593Smuzhiyun 	      GEN8_PPAT(2, 0) |
393*4882a593Smuzhiyun 	      GEN8_PPAT(3, 0) |
394*4882a593Smuzhiyun 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
395*4882a593Smuzhiyun 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
396*4882a593Smuzhiyun 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
397*4882a593Smuzhiyun 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
400*4882a593Smuzhiyun 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun 
setup_private_pat(struct intel_uncore * uncore)403*4882a593Smuzhiyun void setup_private_pat(struct intel_uncore *uncore)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun 	struct drm_i915_private *i915 = uncore->i915;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	GEM_BUG_ON(INTEL_GEN(i915) < 8);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	if (INTEL_GEN(i915) >= 12)
410*4882a593Smuzhiyun 		tgl_setup_private_ppat(uncore);
411*4882a593Smuzhiyun 	else if (INTEL_GEN(i915) >= 10)
412*4882a593Smuzhiyun 		cnl_setup_private_ppat(uncore);
413*4882a593Smuzhiyun 	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
414*4882a593Smuzhiyun 		chv_setup_private_ppat(uncore);
415*4882a593Smuzhiyun 	else
416*4882a593Smuzhiyun 		bdw_setup_private_ppat(uncore);
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
420*4882a593Smuzhiyun #include "selftests/mock_gtt.c"
421*4882a593Smuzhiyun #endif
422