1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright(c) 2011-2015 Intel Corporation. All rights reserved.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * The above copyright notice and this permission notice (including the next
12*4882a593Smuzhiyun * paragraph) shall be included in all copies or substantial portions of the
13*4882a593Smuzhiyun * Software.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*4882a593Smuzhiyun * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*4882a593Smuzhiyun * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*4882a593Smuzhiyun * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21*4882a593Smuzhiyun * SOFTWARE.
22*4882a593Smuzhiyun */
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #include "i915_drv.h"
25*4882a593Smuzhiyun #include "i915_pvinfo.h"
26*4882a593Smuzhiyun #include "i915_vgpu.h"
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun /**
29*4882a593Smuzhiyun * DOC: Intel GVT-g guest support
30*4882a593Smuzhiyun *
31*4882a593Smuzhiyun * Intel GVT-g is a graphics virtualization technology which shares the
32*4882a593Smuzhiyun * GPU among multiple virtual machines on a time-sharing basis. Each
33*4882a593Smuzhiyun * virtual machine is presented a virtual GPU (vGPU), which has equivalent
34*4882a593Smuzhiyun * features as the underlying physical GPU (pGPU), so i915 driver can run
35*4882a593Smuzhiyun * seamlessly in a virtual machine. This file provides vGPU specific
36*4882a593Smuzhiyun * optimizations when running in a virtual machine, to reduce the complexity
37*4882a593Smuzhiyun * of vGPU emulation and to improve the overall performance.
38*4882a593Smuzhiyun *
39*4882a593Smuzhiyun * A primary function introduced here is so-called "address space ballooning"
40*4882a593Smuzhiyun * technique. Intel GVT-g partitions global graphics memory among multiple VMs,
41*4882a593Smuzhiyun * so each VM can directly access a portion of the memory without hypervisor's
42*4882a593Smuzhiyun * intervention, e.g. filling textures or queuing commands. However with the
43*4882a593Smuzhiyun * partitioning an unmodified i915 driver would assume a smaller graphics
44*4882a593Smuzhiyun * memory starting from address ZERO, then requires vGPU emulation module to
45*4882a593Smuzhiyun * translate the graphics address between 'guest view' and 'host view', for
46*4882a593Smuzhiyun * all registers and command opcodes which contain a graphics memory address.
47*4882a593Smuzhiyun * To reduce the complexity, Intel GVT-g introduces "address space ballooning",
48*4882a593Smuzhiyun * by telling the exact partitioning knowledge to each guest i915 driver, which
49*4882a593Smuzhiyun * then reserves and prevents non-allocated portions from allocation. Thus vGPU
50*4882a593Smuzhiyun * emulation module only needs to scan and validate graphics addresses without
51*4882a593Smuzhiyun * complexity of address translation.
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun */
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /**
56*4882a593Smuzhiyun * intel_vgpu_detect - detect virtual GPU
57*4882a593Smuzhiyun * @dev_priv: i915 device private
58*4882a593Smuzhiyun *
59*4882a593Smuzhiyun * This function is called at the initialization stage, to detect whether
60*4882a593Smuzhiyun * running on a vGPU.
61*4882a593Smuzhiyun */
intel_vgpu_detect(struct drm_i915_private * dev_priv)62*4882a593Smuzhiyun void intel_vgpu_detect(struct drm_i915_private *dev_priv)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun struct pci_dev *pdev = dev_priv->drm.pdev;
65*4882a593Smuzhiyun u64 magic;
66*4882a593Smuzhiyun u16 version_major;
67*4882a593Smuzhiyun void __iomem *shared_area;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun * This is called before we setup the main MMIO BAR mappings used via
73*4882a593Smuzhiyun * the uncore structure, so we need to access the BAR directly. Since
74*4882a593Smuzhiyun * we do not support VGT on older gens, return early so we don't have
75*4882a593Smuzhiyun * to consider differently numbered or sized MMIO bars
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun if (INTEL_GEN(dev_priv) < 6)
78*4882a593Smuzhiyun return;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun shared_area = pci_iomap_range(pdev, 0, VGT_PVINFO_PAGE, VGT_PVINFO_SIZE);
81*4882a593Smuzhiyun if (!shared_area) {
82*4882a593Smuzhiyun drm_err(&dev_priv->drm,
83*4882a593Smuzhiyun "failed to map MMIO bar to check for VGT\n");
84*4882a593Smuzhiyun return;
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun magic = readq(shared_area + vgtif_offset(magic));
88*4882a593Smuzhiyun if (magic != VGT_MAGIC)
89*4882a593Smuzhiyun goto out;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun version_major = readw(shared_area + vgtif_offset(version_major));
92*4882a593Smuzhiyun if (version_major < VGT_VERSION_MAJOR) {
93*4882a593Smuzhiyun drm_info(&dev_priv->drm, "VGT interface version mismatch!\n");
94*4882a593Smuzhiyun goto out;
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps));
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun dev_priv->vgpu.active = true;
100*4882a593Smuzhiyun mutex_init(&dev_priv->vgpu.lock);
101*4882a593Smuzhiyun drm_info(&dev_priv->drm, "Virtual GPU for Intel GVT-g detected.\n");
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun out:
104*4882a593Smuzhiyun pci_iounmap(pdev, shared_area);
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
intel_vgpu_register(struct drm_i915_private * i915)107*4882a593Smuzhiyun void intel_vgpu_register(struct drm_i915_private *i915)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun /*
110*4882a593Smuzhiyun * Notify a valid surface after modesetting, when running inside a VM.
111*4882a593Smuzhiyun */
112*4882a593Smuzhiyun if (intel_vgpu_active(i915))
113*4882a593Smuzhiyun intel_uncore_write(&i915->uncore, vgtif_reg(display_ready),
114*4882a593Smuzhiyun VGT_DRV_DISPLAY_READY);
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun
intel_vgpu_active(struct drm_i915_private * dev_priv)117*4882a593Smuzhiyun bool intel_vgpu_active(struct drm_i915_private *dev_priv)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun return dev_priv->vgpu.active;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun
intel_vgpu_has_full_ppgtt(struct drm_i915_private * dev_priv)122*4882a593Smuzhiyun bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *dev_priv)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun return dev_priv->vgpu.caps & VGT_CAPS_FULL_PPGTT;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun
intel_vgpu_has_hwsp_emulation(struct drm_i915_private * dev_priv)127*4882a593Smuzhiyun bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION;
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
intel_vgpu_has_huge_gtt(struct drm_i915_private * dev_priv)132*4882a593Smuzhiyun bool intel_vgpu_has_huge_gtt(struct drm_i915_private *dev_priv)
133*4882a593Smuzhiyun {
134*4882a593Smuzhiyun return dev_priv->vgpu.caps & VGT_CAPS_HUGE_GTT;
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun struct _balloon_info_ {
138*4882a593Smuzhiyun /*
139*4882a593Smuzhiyun * There are up to 2 regions per mappable/unmappable graphic
140*4882a593Smuzhiyun * memory that might be ballooned. Here, index 0/1 is for mappable
141*4882a593Smuzhiyun * graphic memory, 2/3 for unmappable graphic memory.
142*4882a593Smuzhiyun */
143*4882a593Smuzhiyun struct drm_mm_node space[4];
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun static struct _balloon_info_ bl_info;
147*4882a593Smuzhiyun
vgt_deballoon_space(struct i915_ggtt * ggtt,struct drm_mm_node * node)148*4882a593Smuzhiyun static void vgt_deballoon_space(struct i915_ggtt *ggtt,
149*4882a593Smuzhiyun struct drm_mm_node *node)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun struct drm_i915_private *dev_priv = ggtt->vm.i915;
152*4882a593Smuzhiyun if (!drm_mm_node_allocated(node))
153*4882a593Smuzhiyun return;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun drm_dbg(&dev_priv->drm,
156*4882a593Smuzhiyun "deballoon space: range [0x%llx - 0x%llx] %llu KiB.\n",
157*4882a593Smuzhiyun node->start,
158*4882a593Smuzhiyun node->start + node->size,
159*4882a593Smuzhiyun node->size / 1024);
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun ggtt->vm.reserved -= node->size;
162*4882a593Smuzhiyun drm_mm_remove_node(node);
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun /**
166*4882a593Smuzhiyun * intel_vgt_deballoon - deballoon reserved graphics address trunks
167*4882a593Smuzhiyun * @ggtt: the global GGTT from which we reserved earlier
168*4882a593Smuzhiyun *
169*4882a593Smuzhiyun * This function is called to deallocate the ballooned-out graphic memory, when
170*4882a593Smuzhiyun * driver is unloaded or when ballooning fails.
171*4882a593Smuzhiyun */
intel_vgt_deballoon(struct i915_ggtt * ggtt)172*4882a593Smuzhiyun void intel_vgt_deballoon(struct i915_ggtt *ggtt)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun struct drm_i915_private *dev_priv = ggtt->vm.i915;
175*4882a593Smuzhiyun int i;
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun if (!intel_vgpu_active(ggtt->vm.i915))
178*4882a593Smuzhiyun return;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun drm_dbg(&dev_priv->drm, "VGT deballoon.\n");
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun for (i = 0; i < 4; i++)
183*4882a593Smuzhiyun vgt_deballoon_space(ggtt, &bl_info.space[i]);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
vgt_balloon_space(struct i915_ggtt * ggtt,struct drm_mm_node * node,unsigned long start,unsigned long end)186*4882a593Smuzhiyun static int vgt_balloon_space(struct i915_ggtt *ggtt,
187*4882a593Smuzhiyun struct drm_mm_node *node,
188*4882a593Smuzhiyun unsigned long start, unsigned long end)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun struct drm_i915_private *dev_priv = ggtt->vm.i915;
191*4882a593Smuzhiyun unsigned long size = end - start;
192*4882a593Smuzhiyun int ret;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun if (start >= end)
195*4882a593Smuzhiyun return -EINVAL;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun drm_info(&dev_priv->drm,
198*4882a593Smuzhiyun "balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
199*4882a593Smuzhiyun start, end, size / 1024);
200*4882a593Smuzhiyun ret = i915_gem_gtt_reserve(&ggtt->vm, node,
201*4882a593Smuzhiyun size, start, I915_COLOR_UNEVICTABLE,
202*4882a593Smuzhiyun 0);
203*4882a593Smuzhiyun if (!ret)
204*4882a593Smuzhiyun ggtt->vm.reserved += size;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun return ret;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun /**
210*4882a593Smuzhiyun * intel_vgt_balloon - balloon out reserved graphics address trunks
211*4882a593Smuzhiyun * @ggtt: the global GGTT from which to reserve
212*4882a593Smuzhiyun *
213*4882a593Smuzhiyun * This function is called at the initialization stage, to balloon out the
214*4882a593Smuzhiyun * graphic address space allocated to other vGPUs, by marking these spaces as
215*4882a593Smuzhiyun * reserved. The ballooning related knowledge(starting address and size of
216*4882a593Smuzhiyun * the mappable/unmappable graphic memory) is described in the vgt_if structure
217*4882a593Smuzhiyun * in a reserved mmio range.
218*4882a593Smuzhiyun *
219*4882a593Smuzhiyun * To give an example, the drawing below depicts one typical scenario after
220*4882a593Smuzhiyun * ballooning. Here the vGPU1 has 2 pieces of graphic address spaces ballooned
221*4882a593Smuzhiyun * out each for the mappable and the non-mappable part. From the vGPU1 point of
222*4882a593Smuzhiyun * view, the total size is the same as the physical one, with the start address
223*4882a593Smuzhiyun * of its graphic space being zero. Yet there are some portions ballooned out(
224*4882a593Smuzhiyun * the shadow part, which are marked as reserved by drm allocator). From the
225*4882a593Smuzhiyun * host point of view, the graphic address space is partitioned by multiple
226*4882a593Smuzhiyun * vGPUs in different VMs. ::
227*4882a593Smuzhiyun *
228*4882a593Smuzhiyun * vGPU1 view Host view
229*4882a593Smuzhiyun * 0 ------> +-----------+ +-----------+
230*4882a593Smuzhiyun * ^ |###########| | vGPU3 |
231*4882a593Smuzhiyun * | |###########| +-----------+
232*4882a593Smuzhiyun * | |###########| | vGPU2 |
233*4882a593Smuzhiyun * | +-----------+ +-----------+
234*4882a593Smuzhiyun * mappable GM | available | ==> | vGPU1 |
235*4882a593Smuzhiyun * | +-----------+ +-----------+
236*4882a593Smuzhiyun * | |###########| | |
237*4882a593Smuzhiyun * v |###########| | Host |
238*4882a593Smuzhiyun * +=======+===========+ +===========+
239*4882a593Smuzhiyun * ^ |###########| | vGPU3 |
240*4882a593Smuzhiyun * | |###########| +-----------+
241*4882a593Smuzhiyun * | |###########| | vGPU2 |
242*4882a593Smuzhiyun * | +-----------+ +-----------+
243*4882a593Smuzhiyun * unmappable GM | available | ==> | vGPU1 |
244*4882a593Smuzhiyun * | +-----------+ +-----------+
245*4882a593Smuzhiyun * | |###########| | |
246*4882a593Smuzhiyun * | |###########| | Host |
247*4882a593Smuzhiyun * v |###########| | |
248*4882a593Smuzhiyun * total GM size ------> +-----------+ +-----------+
249*4882a593Smuzhiyun *
250*4882a593Smuzhiyun * Returns:
251*4882a593Smuzhiyun * zero on success, non-zero if configuration invalid or ballooning failed
252*4882a593Smuzhiyun */
intel_vgt_balloon(struct i915_ggtt * ggtt)253*4882a593Smuzhiyun int intel_vgt_balloon(struct i915_ggtt *ggtt)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun struct drm_i915_private *dev_priv = ggtt->vm.i915;
256*4882a593Smuzhiyun struct intel_uncore *uncore = &dev_priv->uncore;
257*4882a593Smuzhiyun unsigned long ggtt_end = ggtt->vm.total;
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun unsigned long mappable_base, mappable_size, mappable_end;
260*4882a593Smuzhiyun unsigned long unmappable_base, unmappable_size, unmappable_end;
261*4882a593Smuzhiyun int ret;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun if (!intel_vgpu_active(ggtt->vm.i915))
264*4882a593Smuzhiyun return 0;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun mappable_base =
267*4882a593Smuzhiyun intel_uncore_read(uncore, vgtif_reg(avail_rs.mappable_gmadr.base));
268*4882a593Smuzhiyun mappable_size =
269*4882a593Smuzhiyun intel_uncore_read(uncore, vgtif_reg(avail_rs.mappable_gmadr.size));
270*4882a593Smuzhiyun unmappable_base =
271*4882a593Smuzhiyun intel_uncore_read(uncore, vgtif_reg(avail_rs.nonmappable_gmadr.base));
272*4882a593Smuzhiyun unmappable_size =
273*4882a593Smuzhiyun intel_uncore_read(uncore, vgtif_reg(avail_rs.nonmappable_gmadr.size));
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun mappable_end = mappable_base + mappable_size;
276*4882a593Smuzhiyun unmappable_end = unmappable_base + unmappable_size;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun drm_info(&dev_priv->drm, "VGT ballooning configuration:\n");
279*4882a593Smuzhiyun drm_info(&dev_priv->drm,
280*4882a593Smuzhiyun "Mappable graphic memory: base 0x%lx size %ldKiB\n",
281*4882a593Smuzhiyun mappable_base, mappable_size / 1024);
282*4882a593Smuzhiyun drm_info(&dev_priv->drm,
283*4882a593Smuzhiyun "Unmappable graphic memory: base 0x%lx size %ldKiB\n",
284*4882a593Smuzhiyun unmappable_base, unmappable_size / 1024);
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun if (mappable_end > ggtt->mappable_end ||
287*4882a593Smuzhiyun unmappable_base < ggtt->mappable_end ||
288*4882a593Smuzhiyun unmappable_end > ggtt_end) {
289*4882a593Smuzhiyun drm_err(&dev_priv->drm, "Invalid ballooning configuration!\n");
290*4882a593Smuzhiyun return -EINVAL;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /* Unmappable graphic memory ballooning */
294*4882a593Smuzhiyun if (unmappable_base > ggtt->mappable_end) {
295*4882a593Smuzhiyun ret = vgt_balloon_space(ggtt, &bl_info.space[2],
296*4882a593Smuzhiyun ggtt->mappable_end, unmappable_base);
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun if (ret)
299*4882a593Smuzhiyun goto err;
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun if (unmappable_end < ggtt_end) {
303*4882a593Smuzhiyun ret = vgt_balloon_space(ggtt, &bl_info.space[3],
304*4882a593Smuzhiyun unmappable_end, ggtt_end);
305*4882a593Smuzhiyun if (ret)
306*4882a593Smuzhiyun goto err_upon_mappable;
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun /* Mappable graphic memory ballooning */
310*4882a593Smuzhiyun if (mappable_base) {
311*4882a593Smuzhiyun ret = vgt_balloon_space(ggtt, &bl_info.space[0],
312*4882a593Smuzhiyun 0, mappable_base);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun if (ret)
315*4882a593Smuzhiyun goto err_upon_unmappable;
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun if (mappable_end < ggtt->mappable_end) {
319*4882a593Smuzhiyun ret = vgt_balloon_space(ggtt, &bl_info.space[1],
320*4882a593Smuzhiyun mappable_end, ggtt->mappable_end);
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun if (ret)
323*4882a593Smuzhiyun goto err_below_mappable;
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun drm_info(&dev_priv->drm, "VGT balloon successfully\n");
327*4882a593Smuzhiyun return 0;
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun err_below_mappable:
330*4882a593Smuzhiyun vgt_deballoon_space(ggtt, &bl_info.space[0]);
331*4882a593Smuzhiyun err_upon_unmappable:
332*4882a593Smuzhiyun vgt_deballoon_space(ggtt, &bl_info.space[3]);
333*4882a593Smuzhiyun err_upon_mappable:
334*4882a593Smuzhiyun vgt_deballoon_space(ggtt, &bl_info.space[2]);
335*4882a593Smuzhiyun err:
336*4882a593Smuzhiyun drm_err(&dev_priv->drm, "VGT balloon fail\n");
337*4882a593Smuzhiyun return ret;
338*4882a593Smuzhiyun }
339