1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * This file implements the DMA operations for NVLink devices. The NPU
4*4882a593Smuzhiyun * devices all point to the same iommu table as the parent PCI device.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Copyright Alistair Popple, IBM Corporation 2015.
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/mmu_notifier.h>
10*4882a593Smuzhiyun #include <linux/mmu_context.h>
11*4882a593Smuzhiyun #include <linux/of.h>
12*4882a593Smuzhiyun #include <linux/pci.h>
13*4882a593Smuzhiyun #include <linux/memblock.h>
14*4882a593Smuzhiyun #include <linux/sizes.h>
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <asm/debugfs.h>
17*4882a593Smuzhiyun #include <asm/powernv.h>
18*4882a593Smuzhiyun #include <asm/ppc-pci.h>
19*4882a593Smuzhiyun #include <asm/opal.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #include "pci.h"
22*4882a593Smuzhiyun
get_pci_dev(struct device_node * dn)23*4882a593Smuzhiyun static struct pci_dev *get_pci_dev(struct device_node *dn)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun struct pci_dn *pdn = PCI_DN(dn);
26*4882a593Smuzhiyun struct pci_dev *pdev;
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
29*4882a593Smuzhiyun pdn->busno, pdn->devfn);
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun /*
32*4882a593Smuzhiyun * pci_get_domain_bus_and_slot() increased the reference count of
33*4882a593Smuzhiyun * the PCI device, but callers don't need that actually as the PE
34*4882a593Smuzhiyun * already holds a reference to the device. Since callers aren't
35*4882a593Smuzhiyun * aware of the reference count change, call pci_dev_put() now to
36*4882a593Smuzhiyun * avoid leaks.
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun if (pdev)
39*4882a593Smuzhiyun pci_dev_put(pdev);
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun return pdev;
42*4882a593Smuzhiyun }
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun /* Given a NPU device get the associated PCI device. */
pnv_pci_get_gpu_dev(struct pci_dev * npdev)45*4882a593Smuzhiyun struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun struct device_node *dn;
48*4882a593Smuzhiyun struct pci_dev *gpdev;
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun if (WARN_ON(!npdev))
51*4882a593Smuzhiyun return NULL;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun if (WARN_ON(!npdev->dev.of_node))
54*4882a593Smuzhiyun return NULL;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /* Get assoicated PCI device */
57*4882a593Smuzhiyun dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
58*4882a593Smuzhiyun if (!dn)
59*4882a593Smuzhiyun return NULL;
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun gpdev = get_pci_dev(dn);
62*4882a593Smuzhiyun of_node_put(dn);
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun return gpdev;
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun /* Given the real PCI device get a linked NPU device. */
pnv_pci_get_npu_dev(struct pci_dev * gpdev,int index)69*4882a593Smuzhiyun struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun struct device_node *dn;
72*4882a593Smuzhiyun struct pci_dev *npdev;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun if (WARN_ON(!gpdev))
75*4882a593Smuzhiyun return NULL;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun /* Not all PCI devices have device-tree nodes */
78*4882a593Smuzhiyun if (!gpdev->dev.of_node)
79*4882a593Smuzhiyun return NULL;
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /* Get assoicated PCI device */
82*4882a593Smuzhiyun dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
83*4882a593Smuzhiyun if (!dn)
84*4882a593Smuzhiyun return NULL;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun npdev = get_pci_dev(dn);
87*4882a593Smuzhiyun of_node_put(dn);
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun return npdev;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun EXPORT_SYMBOL(pnv_pci_get_npu_dev);
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun #ifdef CONFIG_IOMMU_API
94*4882a593Smuzhiyun /*
95*4882a593Smuzhiyun * Returns the PE assoicated with the PCI device of the given
96*4882a593Smuzhiyun * NPU. Returns the linked pci device if pci_dev != NULL.
97*4882a593Smuzhiyun */
get_gpu_pci_dev_and_pe(struct pnv_ioda_pe * npe,struct pci_dev ** gpdev)98*4882a593Smuzhiyun static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
99*4882a593Smuzhiyun struct pci_dev **gpdev)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun struct pnv_phb *phb;
102*4882a593Smuzhiyun struct pci_controller *hose;
103*4882a593Smuzhiyun struct pci_dev *pdev;
104*4882a593Smuzhiyun struct pnv_ioda_pe *pe;
105*4882a593Smuzhiyun struct pci_dn *pdn;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun pdev = pnv_pci_get_gpu_dev(npe->pdev);
108*4882a593Smuzhiyun if (!pdev)
109*4882a593Smuzhiyun return NULL;
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun pdn = pci_get_pdn(pdev);
112*4882a593Smuzhiyun if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
113*4882a593Smuzhiyun return NULL;
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun hose = pci_bus_to_host(pdev->bus);
116*4882a593Smuzhiyun phb = hose->private_data;
117*4882a593Smuzhiyun pe = &phb->ioda.pe_array[pdn->pe_number];
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun if (gpdev)
120*4882a593Smuzhiyun *gpdev = pdev;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun return pe;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun static long pnv_npu_unset_window(struct iommu_table_group *table_group,
126*4882a593Smuzhiyun int num);
127*4882a593Smuzhiyun
pnv_npu_set_window(struct iommu_table_group * table_group,int num,struct iommu_table * tbl)128*4882a593Smuzhiyun static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
129*4882a593Smuzhiyun struct iommu_table *tbl)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
132*4882a593Smuzhiyun table_group);
133*4882a593Smuzhiyun struct pnv_phb *phb = npe->phb;
134*4882a593Smuzhiyun int64_t rc;
135*4882a593Smuzhiyun const unsigned long size = tbl->it_indirect_levels ?
136*4882a593Smuzhiyun tbl->it_level_size : tbl->it_size;
137*4882a593Smuzhiyun const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
138*4882a593Smuzhiyun const __u64 win_size = tbl->it_size << tbl->it_page_shift;
139*4882a593Smuzhiyun int num2 = (num == 0) ? 1 : 0;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun /* NPU has just one TVE so if there is another table, remove it first */
142*4882a593Smuzhiyun if (npe->table_group.tables[num2])
143*4882a593Smuzhiyun pnv_npu_unset_window(&npe->table_group, num2);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
146*4882a593Smuzhiyun start_addr, start_addr + win_size - 1,
147*4882a593Smuzhiyun IOMMU_PAGE_SIZE(tbl));
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun rc = opal_pci_map_pe_dma_window(phb->opal_id,
150*4882a593Smuzhiyun npe->pe_number,
151*4882a593Smuzhiyun npe->pe_number,
152*4882a593Smuzhiyun tbl->it_indirect_levels + 1,
153*4882a593Smuzhiyun __pa(tbl->it_base),
154*4882a593Smuzhiyun size << 3,
155*4882a593Smuzhiyun IOMMU_PAGE_SIZE(tbl));
156*4882a593Smuzhiyun if (rc) {
157*4882a593Smuzhiyun pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
158*4882a593Smuzhiyun return rc;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun pnv_pci_ioda2_tce_invalidate_entire(phb, false);
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /* Add the table to the list so its TCE cache will get invalidated */
163*4882a593Smuzhiyun pnv_pci_link_table_and_group(phb->hose->node, num,
164*4882a593Smuzhiyun tbl, &npe->table_group);
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun return 0;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
pnv_npu_unset_window(struct iommu_table_group * table_group,int num)169*4882a593Smuzhiyun static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
172*4882a593Smuzhiyun table_group);
173*4882a593Smuzhiyun struct pnv_phb *phb = npe->phb;
174*4882a593Smuzhiyun int64_t rc;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun if (!npe->table_group.tables[num])
177*4882a593Smuzhiyun return 0;
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun pe_info(npe, "Removing DMA window\n");
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
182*4882a593Smuzhiyun npe->pe_number,
183*4882a593Smuzhiyun 0/* levels */, 0/* table address */,
184*4882a593Smuzhiyun 0/* table size */, 0/* page size */);
185*4882a593Smuzhiyun if (rc) {
186*4882a593Smuzhiyun pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
187*4882a593Smuzhiyun return rc;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun pnv_pci_ioda2_tce_invalidate_entire(phb, false);
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
192*4882a593Smuzhiyun &npe->table_group);
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun return 0;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun /* Switch ownership from platform code to external user (e.g. VFIO) */
pnv_npu_take_ownership(struct iommu_table_group * table_group)198*4882a593Smuzhiyun static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
201*4882a593Smuzhiyun table_group);
202*4882a593Smuzhiyun struct pnv_phb *phb = npe->phb;
203*4882a593Smuzhiyun int64_t rc;
204*4882a593Smuzhiyun struct pci_dev *gpdev = NULL;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /*
207*4882a593Smuzhiyun * Note: NPU has just a single TVE in the hardware which means that
208*4882a593Smuzhiyun * while used by the kernel, it can have either 32bit window or
209*4882a593Smuzhiyun * DMA bypass but never both. So we deconfigure 32bit window only
210*4882a593Smuzhiyun * if it was enabled at the moment of ownership change.
211*4882a593Smuzhiyun */
212*4882a593Smuzhiyun if (npe->table_group.tables[0]) {
213*4882a593Smuzhiyun pnv_npu_unset_window(&npe->table_group, 0);
214*4882a593Smuzhiyun return;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun /* Disable bypass */
218*4882a593Smuzhiyun rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
219*4882a593Smuzhiyun npe->pe_number, npe->pe_number,
220*4882a593Smuzhiyun 0 /* bypass base */, 0);
221*4882a593Smuzhiyun if (rc) {
222*4882a593Smuzhiyun pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
223*4882a593Smuzhiyun return;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun get_gpu_pci_dev_and_pe(npe, &gpdev);
228*4882a593Smuzhiyun if (gpdev)
229*4882a593Smuzhiyun pnv_npu2_unmap_lpar_dev(gpdev);
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
pnv_npu_release_ownership(struct iommu_table_group * table_group)232*4882a593Smuzhiyun static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
235*4882a593Smuzhiyun table_group);
236*4882a593Smuzhiyun struct pci_dev *gpdev = NULL;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun get_gpu_pci_dev_and_pe(npe, &gpdev);
239*4882a593Smuzhiyun if (gpdev)
240*4882a593Smuzhiyun pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun static struct iommu_table_group_ops pnv_pci_npu_ops = {
244*4882a593Smuzhiyun .set_window = pnv_npu_set_window,
245*4882a593Smuzhiyun .unset_window = pnv_npu_unset_window,
246*4882a593Smuzhiyun .take_ownership = pnv_npu_take_ownership,
247*4882a593Smuzhiyun .release_ownership = pnv_npu_release_ownership,
248*4882a593Smuzhiyun };
249*4882a593Smuzhiyun #endif /* !CONFIG_IOMMU_API */
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun /*
252*4882a593Smuzhiyun * NPU2 ATS
253*4882a593Smuzhiyun */
254*4882a593Smuzhiyun /* Maximum possible number of ATSD MMIO registers per NPU */
255*4882a593Smuzhiyun #define NV_NMMU_ATSD_REGS 8
256*4882a593Smuzhiyun #define NV_NPU_MAX_PE_NUM 16
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun /*
259*4882a593Smuzhiyun * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
260*4882a593Smuzhiyun * up to 3 x (GPU + 2xNPUs) (POWER9).
261*4882a593Smuzhiyun */
262*4882a593Smuzhiyun struct npu_comp {
263*4882a593Smuzhiyun struct iommu_table_group table_group;
264*4882a593Smuzhiyun int pe_num;
265*4882a593Smuzhiyun struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
266*4882a593Smuzhiyun };
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /* An NPU descriptor, valid for POWER9 only */
269*4882a593Smuzhiyun struct npu {
270*4882a593Smuzhiyun int index;
271*4882a593Smuzhiyun struct npu_comp npucomp;
272*4882a593Smuzhiyun };
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun #ifdef CONFIG_IOMMU_API
pnv_npu_peers_create_table_userspace(struct iommu_table_group * table_group,int num,__u32 page_shift,__u64 window_size,__u32 levels,struct iommu_table ** ptbl)275*4882a593Smuzhiyun static long pnv_npu_peers_create_table_userspace(
276*4882a593Smuzhiyun struct iommu_table_group *table_group,
277*4882a593Smuzhiyun int num, __u32 page_shift, __u64 window_size, __u32 levels,
278*4882a593Smuzhiyun struct iommu_table **ptbl)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
281*4882a593Smuzhiyun table_group);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun if (!npucomp->pe_num || !npucomp->pe[0] ||
284*4882a593Smuzhiyun !npucomp->pe[0]->table_group.ops ||
285*4882a593Smuzhiyun !npucomp->pe[0]->table_group.ops->create_table)
286*4882a593Smuzhiyun return -EFAULT;
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun return npucomp->pe[0]->table_group.ops->create_table(
289*4882a593Smuzhiyun &npucomp->pe[0]->table_group, num, page_shift,
290*4882a593Smuzhiyun window_size, levels, ptbl);
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
pnv_npu_peers_set_window(struct iommu_table_group * table_group,int num,struct iommu_table * tbl)293*4882a593Smuzhiyun static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
294*4882a593Smuzhiyun int num, struct iommu_table *tbl)
295*4882a593Smuzhiyun {
296*4882a593Smuzhiyun int i, j;
297*4882a593Smuzhiyun long ret = 0;
298*4882a593Smuzhiyun struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
299*4882a593Smuzhiyun table_group);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun for (i = 0; i < npucomp->pe_num; ++i) {
302*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[i];
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun if (!pe->table_group.ops->set_window)
305*4882a593Smuzhiyun continue;
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun ret = pe->table_group.ops->set_window(&pe->table_group,
308*4882a593Smuzhiyun num, tbl);
309*4882a593Smuzhiyun if (ret)
310*4882a593Smuzhiyun break;
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun if (ret) {
314*4882a593Smuzhiyun for (j = 0; j < i; ++j) {
315*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[j];
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun if (!pe->table_group.ops->unset_window)
318*4882a593Smuzhiyun continue;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun ret = pe->table_group.ops->unset_window(
321*4882a593Smuzhiyun &pe->table_group, num);
322*4882a593Smuzhiyun if (ret)
323*4882a593Smuzhiyun break;
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun } else {
326*4882a593Smuzhiyun table_group->tables[num] = iommu_tce_table_get(tbl);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun return ret;
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun
pnv_npu_peers_unset_window(struct iommu_table_group * table_group,int num)332*4882a593Smuzhiyun static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
333*4882a593Smuzhiyun int num)
334*4882a593Smuzhiyun {
335*4882a593Smuzhiyun int i, j;
336*4882a593Smuzhiyun long ret = 0;
337*4882a593Smuzhiyun struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
338*4882a593Smuzhiyun table_group);
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun for (i = 0; i < npucomp->pe_num; ++i) {
341*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[i];
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun WARN_ON(npucomp->table_group.tables[num] !=
344*4882a593Smuzhiyun table_group->tables[num]);
345*4882a593Smuzhiyun if (!npucomp->table_group.tables[num])
346*4882a593Smuzhiyun continue;
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun if (!pe->table_group.ops->unset_window)
349*4882a593Smuzhiyun continue;
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun ret = pe->table_group.ops->unset_window(&pe->table_group, num);
352*4882a593Smuzhiyun if (ret)
353*4882a593Smuzhiyun break;
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun if (ret) {
357*4882a593Smuzhiyun for (j = 0; j < i; ++j) {
358*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[j];
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (!npucomp->table_group.tables[num])
361*4882a593Smuzhiyun continue;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun if (!pe->table_group.ops->set_window)
364*4882a593Smuzhiyun continue;
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun ret = pe->table_group.ops->set_window(&pe->table_group,
367*4882a593Smuzhiyun num, table_group->tables[num]);
368*4882a593Smuzhiyun if (ret)
369*4882a593Smuzhiyun break;
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun } else if (table_group->tables[num]) {
372*4882a593Smuzhiyun iommu_tce_table_put(table_group->tables[num]);
373*4882a593Smuzhiyun table_group->tables[num] = NULL;
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun return ret;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun
pnv_npu_peers_take_ownership(struct iommu_table_group * table_group)379*4882a593Smuzhiyun static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
380*4882a593Smuzhiyun {
381*4882a593Smuzhiyun int i;
382*4882a593Smuzhiyun struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
383*4882a593Smuzhiyun table_group);
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun for (i = 0; i < npucomp->pe_num; ++i) {
386*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[i];
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun if (!pe->table_group.ops ||
389*4882a593Smuzhiyun !pe->table_group.ops->take_ownership)
390*4882a593Smuzhiyun continue;
391*4882a593Smuzhiyun pe->table_group.ops->take_ownership(&pe->table_group);
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun
pnv_npu_peers_release_ownership(struct iommu_table_group * table_group)395*4882a593Smuzhiyun static void pnv_npu_peers_release_ownership(
396*4882a593Smuzhiyun struct iommu_table_group *table_group)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun int i;
399*4882a593Smuzhiyun struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
400*4882a593Smuzhiyun table_group);
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun for (i = 0; i < npucomp->pe_num; ++i) {
403*4882a593Smuzhiyun struct pnv_ioda_pe *pe = npucomp->pe[i];
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun if (!pe->table_group.ops ||
406*4882a593Smuzhiyun !pe->table_group.ops->release_ownership)
407*4882a593Smuzhiyun continue;
408*4882a593Smuzhiyun pe->table_group.ops->release_ownership(&pe->table_group);
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun static struct iommu_table_group_ops pnv_npu_peers_ops = {
413*4882a593Smuzhiyun .get_table_size = pnv_pci_ioda2_get_table_size,
414*4882a593Smuzhiyun .create_table = pnv_npu_peers_create_table_userspace,
415*4882a593Smuzhiyun .set_window = pnv_npu_peers_set_window,
416*4882a593Smuzhiyun .unset_window = pnv_npu_peers_unset_window,
417*4882a593Smuzhiyun .take_ownership = pnv_npu_peers_take_ownership,
418*4882a593Smuzhiyun .release_ownership = pnv_npu_peers_release_ownership,
419*4882a593Smuzhiyun };
420*4882a593Smuzhiyun
pnv_comp_attach_table_group(struct npu_comp * npucomp,struct pnv_ioda_pe * pe)421*4882a593Smuzhiyun static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
422*4882a593Smuzhiyun struct pnv_ioda_pe *pe)
423*4882a593Smuzhiyun {
424*4882a593Smuzhiyun if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
425*4882a593Smuzhiyun return;
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun npucomp->pe[npucomp->pe_num] = pe;
428*4882a593Smuzhiyun ++npucomp->pe_num;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun static struct iommu_table_group *
pnv_try_setup_npu_table_group(struct pnv_ioda_pe * pe)432*4882a593Smuzhiyun pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
433*4882a593Smuzhiyun {
434*4882a593Smuzhiyun struct iommu_table_group *compound_group;
435*4882a593Smuzhiyun struct npu_comp *npucomp;
436*4882a593Smuzhiyun struct pci_dev *gpdev = NULL;
437*4882a593Smuzhiyun struct pci_controller *hose;
438*4882a593Smuzhiyun struct pci_dev *npdev = NULL;
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
441*4882a593Smuzhiyun npdev = pnv_pci_get_npu_dev(gpdev, 0);
442*4882a593Smuzhiyun if (npdev)
443*4882a593Smuzhiyun break;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun if (!npdev)
447*4882a593Smuzhiyun /* It is not an NPU attached device, skip */
448*4882a593Smuzhiyun return NULL;
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun hose = pci_bus_to_host(npdev->bus);
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun if (hose->npu) {
453*4882a593Smuzhiyun /* P9 case: compound group is per-NPU (all gpus, all links) */
454*4882a593Smuzhiyun npucomp = &hose->npu->npucomp;
455*4882a593Smuzhiyun } else {
456*4882a593Smuzhiyun /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
457*4882a593Smuzhiyun npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun compound_group = &npucomp->table_group;
461*4882a593Smuzhiyun if (!compound_group->group) {
462*4882a593Smuzhiyun compound_group->ops = &pnv_npu_peers_ops;
463*4882a593Smuzhiyun iommu_register_group(compound_group, hose->global_number,
464*4882a593Smuzhiyun pe->pe_number);
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun /* Steal capabilities from a GPU PE */
467*4882a593Smuzhiyun compound_group->max_dynamic_windows_supported =
468*4882a593Smuzhiyun pe->table_group.max_dynamic_windows_supported;
469*4882a593Smuzhiyun compound_group->tce32_start = pe->table_group.tce32_start;
470*4882a593Smuzhiyun compound_group->tce32_size = pe->table_group.tce32_size;
471*4882a593Smuzhiyun compound_group->max_levels = pe->table_group.max_levels;
472*4882a593Smuzhiyun if (!compound_group->pgsizes)
473*4882a593Smuzhiyun compound_group->pgsizes = pe->table_group.pgsizes;
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun /*
477*4882a593Smuzhiyun * The gpu would have been added to the iommu group that's created
478*4882a593Smuzhiyun * for the PE. Pull it out now.
479*4882a593Smuzhiyun */
480*4882a593Smuzhiyun iommu_del_device(&gpdev->dev);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun /*
483*4882a593Smuzhiyun * I'm not sure this is strictly required, but it's probably a good idea
484*4882a593Smuzhiyun * since the table_group for the PE is going to be attached to the
485*4882a593Smuzhiyun * compound table group. If we leave the PE's iommu group active then
486*4882a593Smuzhiyun * we might have the same table_group being modifiable via two sepeate
487*4882a593Smuzhiyun * iommu groups.
488*4882a593Smuzhiyun */
489*4882a593Smuzhiyun iommu_group_put(pe->table_group.group);
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun /* now put the GPU into the compound group */
492*4882a593Smuzhiyun pnv_comp_attach_table_group(npucomp, pe);
493*4882a593Smuzhiyun iommu_add_device(compound_group, &gpdev->dev);
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun return compound_group;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun
pnv_npu_compound_attach(struct pnv_ioda_pe * pe)498*4882a593Smuzhiyun static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun struct iommu_table_group *table_group;
501*4882a593Smuzhiyun struct npu_comp *npucomp;
502*4882a593Smuzhiyun struct pci_dev *gpdev = NULL;
503*4882a593Smuzhiyun struct pci_dev *npdev;
504*4882a593Smuzhiyun struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
507*4882a593Smuzhiyun if (!gpe)
508*4882a593Smuzhiyun return NULL;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun /*
511*4882a593Smuzhiyun * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
512*4882a593Smuzhiyun * but NPU bridges do not have this hook defined so we do it here.
513*4882a593Smuzhiyun * We do not setup other table group parameters as they won't be used
514*4882a593Smuzhiyun * anyway - NVLink bridges are subordinate PEs.
515*4882a593Smuzhiyun */
516*4882a593Smuzhiyun pe->table_group.ops = &pnv_pci_npu_ops;
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun table_group = iommu_group_get_iommudata(
519*4882a593Smuzhiyun iommu_group_get(&gpdev->dev));
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun /*
522*4882a593Smuzhiyun * On P9 NPU PHB and PCI PHB support different page sizes,
523*4882a593Smuzhiyun * keep only matching. We expect here that NVLink bridge PE pgsizes is
524*4882a593Smuzhiyun * initialized by the caller.
525*4882a593Smuzhiyun */
526*4882a593Smuzhiyun table_group->pgsizes &= pe->table_group.pgsizes;
527*4882a593Smuzhiyun npucomp = container_of(table_group, struct npu_comp, table_group);
528*4882a593Smuzhiyun pnv_comp_attach_table_group(npucomp, pe);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
531*4882a593Smuzhiyun struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun if (gpdevtmp != gpdev)
534*4882a593Smuzhiyun continue;
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun iommu_add_device(table_group, &npdev->dev);
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun return table_group;
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun
pnv_pci_npu_setup_iommu_groups(void)542*4882a593Smuzhiyun void pnv_pci_npu_setup_iommu_groups(void)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun struct pci_controller *hose;
545*4882a593Smuzhiyun struct pnv_phb *phb;
546*4882a593Smuzhiyun struct pnv_ioda_pe *pe;
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun /*
549*4882a593Smuzhiyun * For non-nvlink devices the IOMMU group is registered when the PE is
550*4882a593Smuzhiyun * configured and devices are added to the group when the per-device
551*4882a593Smuzhiyun * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
552*4882a593Smuzhiyun * only initialise for "normal" IODA PHBs.
553*4882a593Smuzhiyun *
554*4882a593Smuzhiyun * For NVLink devices we need to ensure the NVLinks and the GPU end up
555*4882a593Smuzhiyun * in the same IOMMU group, so that's handled here.
556*4882a593Smuzhiyun */
557*4882a593Smuzhiyun list_for_each_entry(hose, &hose_list, list_node) {
558*4882a593Smuzhiyun phb = hose->private_data;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun if (phb->type == PNV_PHB_IODA2)
561*4882a593Smuzhiyun list_for_each_entry(pe, &phb->ioda.pe_list, list)
562*4882a593Smuzhiyun pnv_try_setup_npu_table_group(pe);
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun /*
566*4882a593Smuzhiyun * Now we have all PHBs discovered, time to add NPU devices to
567*4882a593Smuzhiyun * the corresponding IOMMU groups.
568*4882a593Smuzhiyun */
569*4882a593Smuzhiyun list_for_each_entry(hose, &hose_list, list_node) {
570*4882a593Smuzhiyun unsigned long pgsizes;
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun phb = hose->private_data;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun if (phb->type != PNV_PHB_NPU_NVLINK)
575*4882a593Smuzhiyun continue;
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun pgsizes = pnv_ioda_parse_tce_sizes(phb);
578*4882a593Smuzhiyun list_for_each_entry(pe, &phb->ioda.pe_list, list) {
579*4882a593Smuzhiyun /*
580*4882a593Smuzhiyun * IODA2 bridges get this set up from
581*4882a593Smuzhiyun * pci_controller_ops::setup_bridge but NPU bridges
582*4882a593Smuzhiyun * do not have this hook defined so we do it here.
583*4882a593Smuzhiyun */
584*4882a593Smuzhiyun pe->table_group.pgsizes = pgsizes;
585*4882a593Smuzhiyun pnv_npu_compound_attach(pe);
586*4882a593Smuzhiyun }
587*4882a593Smuzhiyun }
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun #endif /* CONFIG_IOMMU_API */
590*4882a593Smuzhiyun
pnv_npu2_init(struct pci_controller * hose)591*4882a593Smuzhiyun int pnv_npu2_init(struct pci_controller *hose)
592*4882a593Smuzhiyun {
593*4882a593Smuzhiyun static int npu_index;
594*4882a593Smuzhiyun struct npu *npu;
595*4882a593Smuzhiyun int ret;
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun npu = kzalloc(sizeof(*npu), GFP_KERNEL);
598*4882a593Smuzhiyun if (!npu)
599*4882a593Smuzhiyun return -ENOMEM;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun npu_index++;
602*4882a593Smuzhiyun if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
603*4882a593Smuzhiyun ret = -ENOSPC;
604*4882a593Smuzhiyun goto fail_exit;
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun npu->index = npu_index;
607*4882a593Smuzhiyun hose->npu = npu;
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun return 0;
610*4882a593Smuzhiyun
611*4882a593Smuzhiyun fail_exit:
612*4882a593Smuzhiyun kfree(npu);
613*4882a593Smuzhiyun return ret;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun
pnv_npu2_map_lpar_dev(struct pci_dev * gpdev,unsigned int lparid,unsigned long msr)616*4882a593Smuzhiyun int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
617*4882a593Smuzhiyun unsigned long msr)
618*4882a593Smuzhiyun {
619*4882a593Smuzhiyun int ret;
620*4882a593Smuzhiyun struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
621*4882a593Smuzhiyun struct pci_controller *hose;
622*4882a593Smuzhiyun struct pnv_phb *nphb;
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun if (!npdev)
625*4882a593Smuzhiyun return -ENODEV;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun hose = pci_bus_to_host(npdev->bus);
628*4882a593Smuzhiyun if (hose->npu == NULL) {
629*4882a593Smuzhiyun dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
630*4882a593Smuzhiyun return 0;
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun nphb = hose->private_data;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
636*4882a593Smuzhiyun nphb->opal_id, lparid);
637*4882a593Smuzhiyun /*
638*4882a593Smuzhiyun * Currently we only support radix and non-zero LPCR only makes sense
639*4882a593Smuzhiyun * for hash tables so skiboot expects the LPCR parameter to be a zero.
640*4882a593Smuzhiyun */
641*4882a593Smuzhiyun ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
642*4882a593Smuzhiyun 0 /* LPCR bits */);
643*4882a593Smuzhiyun if (ret) {
644*4882a593Smuzhiyun dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
645*4882a593Smuzhiyun return ret;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
649*4882a593Smuzhiyun nphb->opal_id, msr);
650*4882a593Smuzhiyun ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
651*4882a593Smuzhiyun pci_dev_id(gpdev));
652*4882a593Smuzhiyun if (ret < 0)
653*4882a593Smuzhiyun dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
654*4882a593Smuzhiyun else
655*4882a593Smuzhiyun ret = 0;
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun return 0;
658*4882a593Smuzhiyun }
659*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
660*4882a593Smuzhiyun
pnv_npu2_map_lpar(struct pnv_ioda_pe * gpe,unsigned long msr)661*4882a593Smuzhiyun void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
662*4882a593Smuzhiyun {
663*4882a593Smuzhiyun struct pci_dev *gpdev;
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
666*4882a593Smuzhiyun pnv_npu2_map_lpar_dev(gpdev, 0, msr);
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun
pnv_npu2_unmap_lpar_dev(struct pci_dev * gpdev)669*4882a593Smuzhiyun int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
670*4882a593Smuzhiyun {
671*4882a593Smuzhiyun int ret;
672*4882a593Smuzhiyun struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
673*4882a593Smuzhiyun struct pci_controller *hose;
674*4882a593Smuzhiyun struct pnv_phb *nphb;
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun if (!npdev)
677*4882a593Smuzhiyun return -ENODEV;
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun hose = pci_bus_to_host(npdev->bus);
680*4882a593Smuzhiyun if (hose->npu == NULL) {
681*4882a593Smuzhiyun dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
682*4882a593Smuzhiyun return 0;
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun nphb = hose->private_data;
686*4882a593Smuzhiyun
687*4882a593Smuzhiyun dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
688*4882a593Smuzhiyun nphb->opal_id);
689*4882a593Smuzhiyun ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
690*4882a593Smuzhiyun pci_dev_id(gpdev));
691*4882a593Smuzhiyun if (ret < 0) {
692*4882a593Smuzhiyun dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
693*4882a593Smuzhiyun return ret;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun /* Set LPID to 0 anyway, just to be safe */
697*4882a593Smuzhiyun dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
698*4882a593Smuzhiyun ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
699*4882a593Smuzhiyun 0 /* LPCR bits */);
700*4882a593Smuzhiyun if (ret)
701*4882a593Smuzhiyun dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
702*4882a593Smuzhiyun
703*4882a593Smuzhiyun return ret;
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
706