1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright 2018 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun * All Rights Reserved.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a
6*4882a593Smuzhiyun * copy of this software and associated documentation files (the
7*4882a593Smuzhiyun * "Software"), to deal in the Software without restriction, including
8*4882a593Smuzhiyun * without limitation the rights to use, copy, modify, merge, publish,
9*4882a593Smuzhiyun * distribute, sub license, and/or sell copies of the Software, and to
10*4882a593Smuzhiyun * permit persons to whom the Software is furnished to do so, subject to
11*4882a593Smuzhiyun * the following conditions:
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16*4882a593Smuzhiyun * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17*4882a593Smuzhiyun * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18*4882a593Smuzhiyun * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19*4882a593Smuzhiyun * USE OR OTHER DEALINGS IN THE SOFTWARE.
20*4882a593Smuzhiyun *
21*4882a593Smuzhiyun * The above copyright notice and this permission notice (including the
22*4882a593Smuzhiyun * next paragraph) shall be included in all copies or substantial portions
23*4882a593Smuzhiyun * of the Software.
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun */
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #include <linux/io-64-nonatomic-lo-hi.h>
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include "amdgpu.h"
30*4882a593Smuzhiyun #include "amdgpu_gmc.h"
31*4882a593Smuzhiyun #include "amdgpu_ras.h"
32*4882a593Smuzhiyun #include "amdgpu_xgmi.h"
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun /**
35*4882a593Smuzhiyun * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
36*4882a593Smuzhiyun *
37*4882a593Smuzhiyun * @bo: the BO to get the PDE for
38*4882a593Smuzhiyun * @level: the level in the PD hirarchy
39*4882a593Smuzhiyun * @addr: resulting addr
40*4882a593Smuzhiyun * @flags: resulting flags
41*4882a593Smuzhiyun *
42*4882a593Smuzhiyun * Get the address and flags to be used for a PDE (Page Directory Entry).
43*4882a593Smuzhiyun */
amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo * bo,int level,uint64_t * addr,uint64_t * flags)44*4882a593Smuzhiyun void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
45*4882a593Smuzhiyun uint64_t *addr, uint64_t *flags)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
48*4882a593Smuzhiyun struct ttm_dma_tt *ttm;
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun switch (bo->tbo.mem.mem_type) {
51*4882a593Smuzhiyun case TTM_PL_TT:
52*4882a593Smuzhiyun ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
53*4882a593Smuzhiyun *addr = ttm->dma_address[0];
54*4882a593Smuzhiyun break;
55*4882a593Smuzhiyun case TTM_PL_VRAM:
56*4882a593Smuzhiyun *addr = amdgpu_bo_gpu_offset(bo);
57*4882a593Smuzhiyun break;
58*4882a593Smuzhiyun default:
59*4882a593Smuzhiyun *addr = 0;
60*4882a593Smuzhiyun break;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
63*4882a593Smuzhiyun amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /**
67*4882a593Smuzhiyun * amdgpu_gmc_pd_addr - return the address of the root directory
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun */
amdgpu_gmc_pd_addr(struct amdgpu_bo * bo)70*4882a593Smuzhiyun uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
73*4882a593Smuzhiyun uint64_t pd_addr;
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /* TODO: move that into ASIC specific code */
76*4882a593Smuzhiyun if (adev->asic_type >= CHIP_VEGA10) {
77*4882a593Smuzhiyun uint64_t flags = AMDGPU_PTE_VALID;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
80*4882a593Smuzhiyun pd_addr |= flags;
81*4882a593Smuzhiyun } else {
82*4882a593Smuzhiyun pd_addr = amdgpu_bo_gpu_offset(bo);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun return pd_addr;
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun /**
88*4882a593Smuzhiyun * amdgpu_gmc_set_pte_pde - update the page tables using CPU
89*4882a593Smuzhiyun *
90*4882a593Smuzhiyun * @adev: amdgpu_device pointer
91*4882a593Smuzhiyun * @cpu_pt_addr: cpu address of the page table
92*4882a593Smuzhiyun * @gpu_page_idx: entry in the page table to update
93*4882a593Smuzhiyun * @addr: dst addr to write into pte/pde
94*4882a593Smuzhiyun * @flags: access flags
95*4882a593Smuzhiyun *
96*4882a593Smuzhiyun * Update the page tables using CPU.
97*4882a593Smuzhiyun */
amdgpu_gmc_set_pte_pde(struct amdgpu_device * adev,void * cpu_pt_addr,uint32_t gpu_page_idx,uint64_t addr,uint64_t flags)98*4882a593Smuzhiyun int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
99*4882a593Smuzhiyun uint32_t gpu_page_idx, uint64_t addr,
100*4882a593Smuzhiyun uint64_t flags)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun void __iomem *ptr = (void *)cpu_pt_addr;
103*4882a593Smuzhiyun uint64_t value;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun * The following is for PTE only. GART does not have PDEs.
107*4882a593Smuzhiyun */
108*4882a593Smuzhiyun value = addr & 0x0000FFFFFFFFF000ULL;
109*4882a593Smuzhiyun value |= flags;
110*4882a593Smuzhiyun writeq(value, ptr + (gpu_page_idx * 8));
111*4882a593Smuzhiyun return 0;
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun /**
115*4882a593Smuzhiyun * amdgpu_gmc_agp_addr - return the address in the AGP address space
116*4882a593Smuzhiyun *
117*4882a593Smuzhiyun * @tbo: TTM BO which needs the address, must be in GTT domain
118*4882a593Smuzhiyun *
119*4882a593Smuzhiyun * Tries to figure out how to access the BO through the AGP aperture. Returns
120*4882a593Smuzhiyun * AMDGPU_BO_INVALID_OFFSET if that is not possible.
121*4882a593Smuzhiyun */
amdgpu_gmc_agp_addr(struct ttm_buffer_object * bo)122*4882a593Smuzhiyun uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
125*4882a593Smuzhiyun struct ttm_dma_tt *ttm;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
128*4882a593Smuzhiyun return AMDGPU_BO_INVALID_OFFSET;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
131*4882a593Smuzhiyun if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
132*4882a593Smuzhiyun return AMDGPU_BO_INVALID_OFFSET;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun return adev->gmc.agp_start + ttm->dma_address[0];
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun /**
138*4882a593Smuzhiyun * amdgpu_gmc_vram_location - try to find VRAM location
139*4882a593Smuzhiyun *
140*4882a593Smuzhiyun * @adev: amdgpu device structure holding all necessary information
141*4882a593Smuzhiyun * @mc: memory controller structure holding memory information
142*4882a593Smuzhiyun * @base: base address at which to put VRAM
143*4882a593Smuzhiyun *
144*4882a593Smuzhiyun * Function will try to place VRAM at base address provided
145*4882a593Smuzhiyun * as parameter.
146*4882a593Smuzhiyun */
amdgpu_gmc_vram_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,u64 base)147*4882a593Smuzhiyun void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
148*4882a593Smuzhiyun u64 base)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun mc->vram_start = base;
153*4882a593Smuzhiyun mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
154*4882a593Smuzhiyun if (limit && limit < mc->real_vram_size)
155*4882a593Smuzhiyun mc->real_vram_size = limit;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun if (mc->xgmi.num_physical_nodes == 0) {
158*4882a593Smuzhiyun mc->fb_start = mc->vram_start;
159*4882a593Smuzhiyun mc->fb_end = mc->vram_end;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
162*4882a593Smuzhiyun mc->mc_vram_size >> 20, mc->vram_start,
163*4882a593Smuzhiyun mc->vram_end, mc->real_vram_size >> 20);
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun /**
167*4882a593Smuzhiyun * amdgpu_gmc_gart_location - try to find GART location
168*4882a593Smuzhiyun *
169*4882a593Smuzhiyun * @adev: amdgpu device structure holding all necessary information
170*4882a593Smuzhiyun * @mc: memory controller structure holding memory information
171*4882a593Smuzhiyun *
172*4882a593Smuzhiyun * Function will place try to place GART before or after VRAM.
173*4882a593Smuzhiyun *
174*4882a593Smuzhiyun * If GART size is bigger than space left then we ajust GART size.
175*4882a593Smuzhiyun * Thus function will never fails.
176*4882a593Smuzhiyun */
amdgpu_gmc_gart_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)177*4882a593Smuzhiyun void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun const uint64_t four_gb = 0x100000000ULL;
180*4882a593Smuzhiyun u64 size_af, size_bf;
181*4882a593Smuzhiyun /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
182*4882a593Smuzhiyun u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun mc->gart_size += adev->pm.smu_prv_buffer_size;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /* VCE doesn't like it when BOs cross a 4GB segment, so align
187*4882a593Smuzhiyun * the GART base on a 4GB boundary as well.
188*4882a593Smuzhiyun */
189*4882a593Smuzhiyun size_bf = mc->fb_start;
190*4882a593Smuzhiyun size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun if (mc->gart_size > max(size_bf, size_af)) {
193*4882a593Smuzhiyun dev_warn(adev->dev, "limiting GART\n");
194*4882a593Smuzhiyun mc->gart_size = max(size_bf, size_af);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun if ((size_bf >= mc->gart_size && size_bf < size_af) ||
198*4882a593Smuzhiyun (size_af < mc->gart_size))
199*4882a593Smuzhiyun mc->gart_start = 0;
200*4882a593Smuzhiyun else
201*4882a593Smuzhiyun mc->gart_start = max_mc_address - mc->gart_size + 1;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun mc->gart_start &= ~(four_gb - 1);
204*4882a593Smuzhiyun mc->gart_end = mc->gart_start + mc->gart_size - 1;
205*4882a593Smuzhiyun dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
206*4882a593Smuzhiyun mc->gart_size >> 20, mc->gart_start, mc->gart_end);
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun /**
210*4882a593Smuzhiyun * amdgpu_gmc_agp_location - try to find AGP location
211*4882a593Smuzhiyun * @adev: amdgpu device structure holding all necessary information
212*4882a593Smuzhiyun * @mc: memory controller structure holding memory information
213*4882a593Smuzhiyun *
214*4882a593Smuzhiyun * Function will place try to find a place for the AGP BAR in the MC address
215*4882a593Smuzhiyun * space.
216*4882a593Smuzhiyun *
217*4882a593Smuzhiyun * AGP BAR will be assigned the largest available hole in the address space.
218*4882a593Smuzhiyun * Should be called after VRAM and GART locations are setup.
219*4882a593Smuzhiyun */
amdgpu_gmc_agp_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)220*4882a593Smuzhiyun void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun const uint64_t sixteen_gb = 1ULL << 34;
223*4882a593Smuzhiyun const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
224*4882a593Smuzhiyun u64 size_af, size_bf;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun if (amdgpu_sriov_vf(adev)) {
227*4882a593Smuzhiyun mc->agp_start = 0xffffffffffff;
228*4882a593Smuzhiyun mc->agp_end = 0x0;
229*4882a593Smuzhiyun mc->agp_size = 0;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun return;
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun if (mc->fb_start > mc->gart_start) {
235*4882a593Smuzhiyun size_bf = (mc->fb_start & sixteen_gb_mask) -
236*4882a593Smuzhiyun ALIGN(mc->gart_end + 1, sixteen_gb);
237*4882a593Smuzhiyun size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
238*4882a593Smuzhiyun } else {
239*4882a593Smuzhiyun size_bf = mc->fb_start & sixteen_gb_mask;
240*4882a593Smuzhiyun size_af = (mc->gart_start & sixteen_gb_mask) -
241*4882a593Smuzhiyun ALIGN(mc->fb_end + 1, sixteen_gb);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun if (size_bf > size_af) {
245*4882a593Smuzhiyun mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
246*4882a593Smuzhiyun mc->agp_size = size_bf;
247*4882a593Smuzhiyun } else {
248*4882a593Smuzhiyun mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
249*4882a593Smuzhiyun mc->agp_size = size_af;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun mc->agp_end = mc->agp_start + mc->agp_size - 1;
253*4882a593Smuzhiyun dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
254*4882a593Smuzhiyun mc->agp_size >> 20, mc->agp_start, mc->agp_end);
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun /**
258*4882a593Smuzhiyun * amdgpu_gmc_filter_faults - filter VM faults
259*4882a593Smuzhiyun *
260*4882a593Smuzhiyun * @adev: amdgpu device structure
261*4882a593Smuzhiyun * @addr: address of the VM fault
262*4882a593Smuzhiyun * @pasid: PASID of the process causing the fault
263*4882a593Smuzhiyun * @timestamp: timestamp of the fault
264*4882a593Smuzhiyun *
265*4882a593Smuzhiyun * Returns:
266*4882a593Smuzhiyun * True if the fault was filtered and should not be processed further.
267*4882a593Smuzhiyun * False if the fault is a new one and needs to be handled.
268*4882a593Smuzhiyun */
amdgpu_gmc_filter_faults(struct amdgpu_device * adev,uint64_t addr,uint16_t pasid,uint64_t timestamp)269*4882a593Smuzhiyun bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
270*4882a593Smuzhiyun uint16_t pasid, uint64_t timestamp)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun struct amdgpu_gmc *gmc = &adev->gmc;
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun uint64_t stamp, key = addr << 4 | pasid;
275*4882a593Smuzhiyun struct amdgpu_gmc_fault *fault;
276*4882a593Smuzhiyun uint32_t hash;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun /* If we don't have space left in the ring buffer return immediately */
279*4882a593Smuzhiyun stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
280*4882a593Smuzhiyun AMDGPU_GMC_FAULT_TIMEOUT;
281*4882a593Smuzhiyun if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
282*4882a593Smuzhiyun return true;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun /* Try to find the fault in the hash */
285*4882a593Smuzhiyun hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
286*4882a593Smuzhiyun fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
287*4882a593Smuzhiyun while (fault->timestamp >= stamp) {
288*4882a593Smuzhiyun uint64_t tmp;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun if (fault->key == key)
291*4882a593Smuzhiyun return true;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun tmp = fault->timestamp;
294*4882a593Smuzhiyun fault = &gmc->fault_ring[fault->next];
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun /* Check if the entry was reused */
297*4882a593Smuzhiyun if (fault->timestamp >= tmp)
298*4882a593Smuzhiyun break;
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /* Add the fault to the ring */
302*4882a593Smuzhiyun fault = &gmc->fault_ring[gmc->last_fault];
303*4882a593Smuzhiyun fault->key = key;
304*4882a593Smuzhiyun fault->timestamp = timestamp;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /* And update the hash */
307*4882a593Smuzhiyun fault->next = gmc->fault_hash[hash].idx;
308*4882a593Smuzhiyun gmc->fault_hash[hash].idx = gmc->last_fault++;
309*4882a593Smuzhiyun return false;
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
amdgpu_gmc_ras_late_init(struct amdgpu_device * adev)312*4882a593Smuzhiyun int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun int r;
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
317*4882a593Smuzhiyun r = adev->umc.funcs->ras_late_init(adev);
318*4882a593Smuzhiyun if (r)
319*4882a593Smuzhiyun return r;
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
323*4882a593Smuzhiyun r = adev->mmhub.funcs->ras_late_init(adev);
324*4882a593Smuzhiyun if (r)
325*4882a593Smuzhiyun return r;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun return amdgpu_xgmi_ras_late_init(adev);
329*4882a593Smuzhiyun }
330*4882a593Smuzhiyun
amdgpu_gmc_ras_fini(struct amdgpu_device * adev)331*4882a593Smuzhiyun void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
332*4882a593Smuzhiyun {
333*4882a593Smuzhiyun amdgpu_umc_ras_fini(adev);
334*4882a593Smuzhiyun amdgpu_mmhub_ras_fini(adev);
335*4882a593Smuzhiyun amdgpu_xgmi_ras_fini(adev);
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /*
339*4882a593Smuzhiyun * The latest engine allocation on gfx9/10 is:
340*4882a593Smuzhiyun * Engine 2, 3: firmware
341*4882a593Smuzhiyun * Engine 0, 1, 4~16: amdgpu ring,
342*4882a593Smuzhiyun * subject to change when ring number changes
343*4882a593Smuzhiyun * Engine 17: Gart flushes
344*4882a593Smuzhiyun */
345*4882a593Smuzhiyun #define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
346*4882a593Smuzhiyun #define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
347*4882a593Smuzhiyun
amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device * adev)348*4882a593Smuzhiyun int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun struct amdgpu_ring *ring;
351*4882a593Smuzhiyun unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
352*4882a593Smuzhiyun {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
353*4882a593Smuzhiyun GFXHUB_FREE_VM_INV_ENGS_BITMAP};
354*4882a593Smuzhiyun unsigned i;
355*4882a593Smuzhiyun unsigned vmhub, inv_eng;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun for (i = 0; i < adev->num_rings; ++i) {
358*4882a593Smuzhiyun ring = adev->rings[i];
359*4882a593Smuzhiyun vmhub = ring->funcs->vmhub;
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun if (ring == &adev->mes.ring)
362*4882a593Smuzhiyun continue;
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun inv_eng = ffs(vm_inv_engs[vmhub]);
365*4882a593Smuzhiyun if (!inv_eng) {
366*4882a593Smuzhiyun dev_err(adev->dev, "no VM inv eng for ring %s\n",
367*4882a593Smuzhiyun ring->name);
368*4882a593Smuzhiyun return -EINVAL;
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun ring->vm_inv_eng = inv_eng - 1;
372*4882a593Smuzhiyun vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
375*4882a593Smuzhiyun ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun return 0;
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun /**
382*4882a593Smuzhiyun * amdgpu_tmz_set -- check and set if a device supports TMZ
383*4882a593Smuzhiyun * @adev: amdgpu_device pointer
384*4882a593Smuzhiyun *
385*4882a593Smuzhiyun * Check and set if an the device @adev supports Trusted Memory
386*4882a593Smuzhiyun * Zones (TMZ).
387*4882a593Smuzhiyun */
amdgpu_gmc_tmz_set(struct amdgpu_device * adev)388*4882a593Smuzhiyun void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
389*4882a593Smuzhiyun {
390*4882a593Smuzhiyun switch (adev->asic_type) {
391*4882a593Smuzhiyun case CHIP_RAVEN:
392*4882a593Smuzhiyun case CHIP_RENOIR:
393*4882a593Smuzhiyun case CHIP_NAVI10:
394*4882a593Smuzhiyun case CHIP_NAVI14:
395*4882a593Smuzhiyun case CHIP_NAVI12:
396*4882a593Smuzhiyun /* Don't enable it by default yet.
397*4882a593Smuzhiyun */
398*4882a593Smuzhiyun if (amdgpu_tmz < 1) {
399*4882a593Smuzhiyun adev->gmc.tmz_enabled = false;
400*4882a593Smuzhiyun dev_info(adev->dev,
401*4882a593Smuzhiyun "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
402*4882a593Smuzhiyun } else {
403*4882a593Smuzhiyun adev->gmc.tmz_enabled = true;
404*4882a593Smuzhiyun dev_info(adev->dev,
405*4882a593Smuzhiyun "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun break;
408*4882a593Smuzhiyun default:
409*4882a593Smuzhiyun adev->gmc.tmz_enabled = false;
410*4882a593Smuzhiyun dev_warn(adev->dev,
411*4882a593Smuzhiyun "Trusted Memory Zone (TMZ) feature not supported\n");
412*4882a593Smuzhiyun break;
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun /**
417*4882a593Smuzhiyun * amdgpu_noretry_set -- set per asic noretry defaults
418*4882a593Smuzhiyun * @adev: amdgpu_device pointer
419*4882a593Smuzhiyun *
420*4882a593Smuzhiyun * Set a per asic default for the no-retry parameter.
421*4882a593Smuzhiyun *
422*4882a593Smuzhiyun */
amdgpu_gmc_noretry_set(struct amdgpu_device * adev)423*4882a593Smuzhiyun void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun struct amdgpu_gmc *gmc = &adev->gmc;
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun switch (adev->asic_type) {
428*4882a593Smuzhiyun case CHIP_RAVEN:
429*4882a593Smuzhiyun /* Raven currently has issues with noretry
430*4882a593Smuzhiyun * regardless of what we decide for other
431*4882a593Smuzhiyun * asics, we should leave raven with
432*4882a593Smuzhiyun * noretry = 0 until we root cause the
433*4882a593Smuzhiyun * issues.
434*4882a593Smuzhiyun */
435*4882a593Smuzhiyun if (amdgpu_noretry == -1)
436*4882a593Smuzhiyun gmc->noretry = 0;
437*4882a593Smuzhiyun else
438*4882a593Smuzhiyun gmc->noretry = amdgpu_noretry;
439*4882a593Smuzhiyun break;
440*4882a593Smuzhiyun default:
441*4882a593Smuzhiyun /* default this to 0 for now, but we may want
442*4882a593Smuzhiyun * to change this in the future for certain
443*4882a593Smuzhiyun * GPUs as it can increase performance in
444*4882a593Smuzhiyun * certain cases.
445*4882a593Smuzhiyun */
446*4882a593Smuzhiyun if (amdgpu_noretry == -1)
447*4882a593Smuzhiyun gmc->noretry = 0;
448*4882a593Smuzhiyun else
449*4882a593Smuzhiyun gmc->noretry = amdgpu_noretry;
450*4882a593Smuzhiyun break;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun
amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device * adev,int hub_type,bool enable)454*4882a593Smuzhiyun void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
455*4882a593Smuzhiyun bool enable)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun struct amdgpu_vmhub *hub;
458*4882a593Smuzhiyun u32 tmp, reg, i;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun hub = &adev->vmhub[hub_type];
461*4882a593Smuzhiyun for (i = 0; i < 16; i++) {
462*4882a593Smuzhiyun reg = hub->vm_context0_cntl + hub->ctx_distance * i;
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun tmp = RREG32(reg);
465*4882a593Smuzhiyun if (enable)
466*4882a593Smuzhiyun tmp |= hub->vm_cntx_cntl_vm_fault;
467*4882a593Smuzhiyun else
468*4882a593Smuzhiyun tmp &= ~hub->vm_cntx_cntl_vm_fault;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun WREG32(reg, tmp);
471*4882a593Smuzhiyun }
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun
amdgpu_gmc_get_vbios_allocations(struct amdgpu_device * adev)474*4882a593Smuzhiyun void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun unsigned size;
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /*
479*4882a593Smuzhiyun * TODO:
480*4882a593Smuzhiyun * Currently there is a bug where some memory client outside
481*4882a593Smuzhiyun * of the driver writes to first 8M of VRAM on S3 resume,
482*4882a593Smuzhiyun * this overrides GART which by default gets placed in first 8M and
483*4882a593Smuzhiyun * causes VM_FAULTS once GTT is accessed.
484*4882a593Smuzhiyun * Keep the stolen memory reservation until the while this is not solved.
485*4882a593Smuzhiyun */
486*4882a593Smuzhiyun switch (adev->asic_type) {
487*4882a593Smuzhiyun case CHIP_VEGA10:
488*4882a593Smuzhiyun case CHIP_RAVEN:
489*4882a593Smuzhiyun case CHIP_RENOIR:
490*4882a593Smuzhiyun adev->mman.keep_stolen_vga_memory = true;
491*4882a593Smuzhiyun break;
492*4882a593Smuzhiyun default:
493*4882a593Smuzhiyun adev->mman.keep_stolen_vga_memory = false;
494*4882a593Smuzhiyun break;
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
498*4882a593Smuzhiyun size = 0;
499*4882a593Smuzhiyun } else {
500*4882a593Smuzhiyun size = amdgpu_gmc_get_vbios_fb_size(adev);
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun if (adev->mman.keep_stolen_vga_memory)
503*4882a593Smuzhiyun size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun /* set to 0 if the pre-OS buffer uses up most of vram */
507*4882a593Smuzhiyun if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
508*4882a593Smuzhiyun size = 0;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
511*4882a593Smuzhiyun adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
512*4882a593Smuzhiyun adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
513*4882a593Smuzhiyun } else {
514*4882a593Smuzhiyun adev->mman.stolen_vga_size = size;
515*4882a593Smuzhiyun adev->mman.stolen_extended_size = 0;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun }
518