xref: /OK3568_Linux_fs/kernel/drivers/infiniband/hw/qib/qib_file_ops.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3*4882a593Smuzhiyun  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4*4882a593Smuzhiyun  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * This software is available to you under a choice of one of two
7*4882a593Smuzhiyun  * licenses.  You may choose to be licensed under the terms of the GNU
8*4882a593Smuzhiyun  * General Public License (GPL) Version 2, available from the file
9*4882a593Smuzhiyun  * COPYING in the main directory of this source tree, or the
10*4882a593Smuzhiyun  * OpenIB.org BSD license below:
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  *     Redistribution and use in source and binary forms, with or
13*4882a593Smuzhiyun  *     without modification, are permitted provided that the following
14*4882a593Smuzhiyun  *     conditions are met:
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  *      - Redistributions of source code must retain the above
17*4882a593Smuzhiyun  *        copyright notice, this list of conditions and the following
18*4882a593Smuzhiyun  *        disclaimer.
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  *      - Redistributions in binary form must reproduce the above
21*4882a593Smuzhiyun  *        copyright notice, this list of conditions and the following
22*4882a593Smuzhiyun  *        disclaimer in the documentation and/or other materials
23*4882a593Smuzhiyun  *        provided with the distribution.
24*4882a593Smuzhiyun  *
25*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26*4882a593Smuzhiyun  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27*4882a593Smuzhiyun  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28*4882a593Smuzhiyun  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29*4882a593Smuzhiyun  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30*4882a593Smuzhiyun  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31*4882a593Smuzhiyun  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32*4882a593Smuzhiyun  * SOFTWARE.
33*4882a593Smuzhiyun  */
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #include <linux/pci.h>
36*4882a593Smuzhiyun #include <linux/poll.h>
37*4882a593Smuzhiyun #include <linux/cdev.h>
38*4882a593Smuzhiyun #include <linux/swap.h>
39*4882a593Smuzhiyun #include <linux/vmalloc.h>
40*4882a593Smuzhiyun #include <linux/highmem.h>
41*4882a593Smuzhiyun #include <linux/io.h>
42*4882a593Smuzhiyun #include <linux/jiffies.h>
43*4882a593Smuzhiyun #include <linux/delay.h>
44*4882a593Smuzhiyun #include <linux/export.h>
45*4882a593Smuzhiyun #include <linux/uio.h>
46*4882a593Smuzhiyun #include <linux/pgtable.h>
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun #include <rdma/ib.h>
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun #include "qib.h"
51*4882a593Smuzhiyun #include "qib_common.h"
52*4882a593Smuzhiyun #include "qib_user_sdma.h"
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun #undef pr_fmt
55*4882a593Smuzhiyun #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun static int qib_open(struct inode *, struct file *);
58*4882a593Smuzhiyun static int qib_close(struct inode *, struct file *);
59*4882a593Smuzhiyun static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
60*4882a593Smuzhiyun static ssize_t qib_write_iter(struct kiocb *, struct iov_iter *);
61*4882a593Smuzhiyun static __poll_t qib_poll(struct file *, struct poll_table_struct *);
62*4882a593Smuzhiyun static int qib_mmapf(struct file *, struct vm_area_struct *);
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun  * This is really, really weird shit - write() and writev() here
66*4882a593Smuzhiyun  * have completely unrelated semantics.  Sucky userland ABI,
67*4882a593Smuzhiyun  * film at 11.
68*4882a593Smuzhiyun  */
69*4882a593Smuzhiyun static const struct file_operations qib_file_ops = {
70*4882a593Smuzhiyun 	.owner = THIS_MODULE,
71*4882a593Smuzhiyun 	.write = qib_write,
72*4882a593Smuzhiyun 	.write_iter = qib_write_iter,
73*4882a593Smuzhiyun 	.open = qib_open,
74*4882a593Smuzhiyun 	.release = qib_close,
75*4882a593Smuzhiyun 	.poll = qib_poll,
76*4882a593Smuzhiyun 	.mmap = qib_mmapf,
77*4882a593Smuzhiyun 	.llseek = noop_llseek,
78*4882a593Smuzhiyun };
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun /*
81*4882a593Smuzhiyun  * Convert kernel virtual addresses to physical addresses so they don't
82*4882a593Smuzhiyun  * potentially conflict with the chip addresses used as mmap offsets.
83*4882a593Smuzhiyun  * It doesn't really matter what mmap offset we use as long as we can
84*4882a593Smuzhiyun  * interpret it correctly.
85*4882a593Smuzhiyun  */
cvt_kvaddr(void * p)86*4882a593Smuzhiyun static u64 cvt_kvaddr(void *p)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	struct page *page;
89*4882a593Smuzhiyun 	u64 paddr = 0;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	page = vmalloc_to_page(p);
92*4882a593Smuzhiyun 	if (page)
93*4882a593Smuzhiyun 		paddr = page_to_pfn(page) << PAGE_SHIFT;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	return paddr;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun 
qib_get_base_info(struct file * fp,void __user * ubase,size_t ubase_size)98*4882a593Smuzhiyun static int qib_get_base_info(struct file *fp, void __user *ubase,
99*4882a593Smuzhiyun 			     size_t ubase_size)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd = ctxt_fp(fp);
102*4882a593Smuzhiyun 	int ret = 0;
103*4882a593Smuzhiyun 	struct qib_base_info *kinfo = NULL;
104*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
105*4882a593Smuzhiyun 	struct qib_pportdata *ppd = rcd->ppd;
106*4882a593Smuzhiyun 	unsigned subctxt_cnt;
107*4882a593Smuzhiyun 	int shared, master;
108*4882a593Smuzhiyun 	size_t sz;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	subctxt_cnt = rcd->subctxt_cnt;
111*4882a593Smuzhiyun 	if (!subctxt_cnt) {
112*4882a593Smuzhiyun 		shared = 0;
113*4882a593Smuzhiyun 		master = 0;
114*4882a593Smuzhiyun 		subctxt_cnt = 1;
115*4882a593Smuzhiyun 	} else {
116*4882a593Smuzhiyun 		shared = 1;
117*4882a593Smuzhiyun 		master = !subctxt_fp(fp);
118*4882a593Smuzhiyun 	}
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	sz = sizeof(*kinfo);
121*4882a593Smuzhiyun 	/* If context sharing is not requested, allow the old size structure */
122*4882a593Smuzhiyun 	if (!shared)
123*4882a593Smuzhiyun 		sz -= 7 * sizeof(u64);
124*4882a593Smuzhiyun 	if (ubase_size < sz) {
125*4882a593Smuzhiyun 		ret = -EINVAL;
126*4882a593Smuzhiyun 		goto bail;
127*4882a593Smuzhiyun 	}
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
130*4882a593Smuzhiyun 	if (kinfo == NULL) {
131*4882a593Smuzhiyun 		ret = -ENOMEM;
132*4882a593Smuzhiyun 		goto bail;
133*4882a593Smuzhiyun 	}
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	ret = dd->f_get_base_info(rcd, kinfo);
136*4882a593Smuzhiyun 	if (ret < 0)
137*4882a593Smuzhiyun 		goto bail;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	kinfo->spi_rcvhdr_cnt = dd->rcvhdrcnt;
140*4882a593Smuzhiyun 	kinfo->spi_rcvhdrent_size = dd->rcvhdrentsize;
141*4882a593Smuzhiyun 	kinfo->spi_tidegrcnt = rcd->rcvegrcnt;
142*4882a593Smuzhiyun 	kinfo->spi_rcv_egrbufsize = dd->rcvegrbufsize;
143*4882a593Smuzhiyun 	/*
144*4882a593Smuzhiyun 	 * have to mmap whole thing
145*4882a593Smuzhiyun 	 */
146*4882a593Smuzhiyun 	kinfo->spi_rcv_egrbuftotlen =
147*4882a593Smuzhiyun 		rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size;
148*4882a593Smuzhiyun 	kinfo->spi_rcv_egrperchunk = rcd->rcvegrbufs_perchunk;
149*4882a593Smuzhiyun 	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
150*4882a593Smuzhiyun 		rcd->rcvegrbuf_chunks;
151*4882a593Smuzhiyun 	kinfo->spi_tidcnt = dd->rcvtidcnt / subctxt_cnt;
152*4882a593Smuzhiyun 	if (master)
153*4882a593Smuzhiyun 		kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt;
154*4882a593Smuzhiyun 	/*
155*4882a593Smuzhiyun 	 * for this use, may be cfgctxts summed over all chips that
156*4882a593Smuzhiyun 	 * are are configured and present
157*4882a593Smuzhiyun 	 */
158*4882a593Smuzhiyun 	kinfo->spi_nctxts = dd->cfgctxts;
159*4882a593Smuzhiyun 	/* unit (chip/board) our context is on */
160*4882a593Smuzhiyun 	kinfo->spi_unit = dd->unit;
161*4882a593Smuzhiyun 	kinfo->spi_port = ppd->port;
162*4882a593Smuzhiyun 	/* for now, only a single page */
163*4882a593Smuzhiyun 	kinfo->spi_tid_maxsize = PAGE_SIZE;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	/*
166*4882a593Smuzhiyun 	 * Doing this per context, and based on the skip value, etc.  This has
167*4882a593Smuzhiyun 	 * to be the actual buffer size, since the protocol code treats it
168*4882a593Smuzhiyun 	 * as an array.
169*4882a593Smuzhiyun 	 *
170*4882a593Smuzhiyun 	 * These have to be set to user addresses in the user code via mmap.
171*4882a593Smuzhiyun 	 * These values are used on return to user code for the mmap target
172*4882a593Smuzhiyun 	 * addresses only.  For 32 bit, same 44 bit address problem, so use
173*4882a593Smuzhiyun 	 * the physical address, not virtual.  Before 2.6.11, using the
174*4882a593Smuzhiyun 	 * page_address() macro worked, but in 2.6.11, even that returns the
175*4882a593Smuzhiyun 	 * full 64 bit address (upper bits all 1's).  So far, using the
176*4882a593Smuzhiyun 	 * physical addresses (or chip offsets, for chip mapping) works, but
177*4882a593Smuzhiyun 	 * no doubt some future kernel release will change that, and we'll be
178*4882a593Smuzhiyun 	 * on to yet another method of dealing with this.
179*4882a593Smuzhiyun 	 * Normally only one of rcvhdr_tailaddr or rhf_offset is useful
180*4882a593Smuzhiyun 	 * since the chips with non-zero rhf_offset don't normally
181*4882a593Smuzhiyun 	 * enable tail register updates to host memory, but for testing,
182*4882a593Smuzhiyun 	 * both can be enabled and used.
183*4882a593Smuzhiyun 	 */
184*4882a593Smuzhiyun 	kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys;
185*4882a593Smuzhiyun 	kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
186*4882a593Smuzhiyun 	kinfo->spi_rhf_offset = dd->rhf_offset;
187*4882a593Smuzhiyun 	kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys;
188*4882a593Smuzhiyun 	kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
189*4882a593Smuzhiyun 	/* setup per-unit (not port) status area for user programs */
190*4882a593Smuzhiyun 	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
191*4882a593Smuzhiyun 		(char *) ppd->statusp -
192*4882a593Smuzhiyun 		(char *) dd->pioavailregs_dma;
193*4882a593Smuzhiyun 	kinfo->spi_uregbase = (u64) dd->uregbase + dd->ureg_align * rcd->ctxt;
194*4882a593Smuzhiyun 	if (!shared) {
195*4882a593Smuzhiyun 		kinfo->spi_piocnt = rcd->piocnt;
196*4882a593Smuzhiyun 		kinfo->spi_piobufbase = (u64) rcd->piobufs;
197*4882a593Smuzhiyun 		kinfo->spi_sendbuf_status = cvt_kvaddr(rcd->user_event_mask);
198*4882a593Smuzhiyun 	} else if (master) {
199*4882a593Smuzhiyun 		kinfo->spi_piocnt = (rcd->piocnt / subctxt_cnt) +
200*4882a593Smuzhiyun 				    (rcd->piocnt % subctxt_cnt);
201*4882a593Smuzhiyun 		/* Master's PIO buffers are after all the slave's */
202*4882a593Smuzhiyun 		kinfo->spi_piobufbase = (u64) rcd->piobufs +
203*4882a593Smuzhiyun 			dd->palign *
204*4882a593Smuzhiyun 			(rcd->piocnt - kinfo->spi_piocnt);
205*4882a593Smuzhiyun 	} else {
206*4882a593Smuzhiyun 		unsigned slave = subctxt_fp(fp) - 1;
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 		kinfo->spi_piocnt = rcd->piocnt / subctxt_cnt;
209*4882a593Smuzhiyun 		kinfo->spi_piobufbase = (u64) rcd->piobufs +
210*4882a593Smuzhiyun 			dd->palign * kinfo->spi_piocnt * slave;
211*4882a593Smuzhiyun 	}
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	if (shared) {
214*4882a593Smuzhiyun 		kinfo->spi_sendbuf_status =
215*4882a593Smuzhiyun 			cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]);
216*4882a593Smuzhiyun 		/* only spi_subctxt_* fields should be set in this block! */
217*4882a593Smuzhiyun 		kinfo->spi_subctxt_uregbase = cvt_kvaddr(rcd->subctxt_uregbase);
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 		kinfo->spi_subctxt_rcvegrbuf =
220*4882a593Smuzhiyun 			cvt_kvaddr(rcd->subctxt_rcvegrbuf);
221*4882a593Smuzhiyun 		kinfo->spi_subctxt_rcvhdr_base =
222*4882a593Smuzhiyun 			cvt_kvaddr(rcd->subctxt_rcvhdr_base);
223*4882a593Smuzhiyun 	}
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	/*
226*4882a593Smuzhiyun 	 * All user buffers are 2KB buffers.  If we ever support
227*4882a593Smuzhiyun 	 * giving 4KB buffers to user processes, this will need some
228*4882a593Smuzhiyun 	 * work.  Can't use piobufbase directly, because it has
229*4882a593Smuzhiyun 	 * both 2K and 4K buffer base values.
230*4882a593Smuzhiyun 	 */
231*4882a593Smuzhiyun 	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) /
232*4882a593Smuzhiyun 		dd->palign;
233*4882a593Smuzhiyun 	kinfo->spi_pioalign = dd->palign;
234*4882a593Smuzhiyun 	kinfo->spi_qpair = QIB_KD_QP;
235*4882a593Smuzhiyun 	/*
236*4882a593Smuzhiyun 	 * user mode PIO buffers are always 2KB, even when 4KB can
237*4882a593Smuzhiyun 	 * be received, and sent via the kernel; this is ibmaxlen
238*4882a593Smuzhiyun 	 * for 2K MTU.
239*4882a593Smuzhiyun 	 */
240*4882a593Smuzhiyun 	kinfo->spi_piosize = dd->piosize2k - 2 * sizeof(u32);
241*4882a593Smuzhiyun 	kinfo->spi_mtu = ppd->ibmaxlen; /* maxlen, not ibmtu */
242*4882a593Smuzhiyun 	kinfo->spi_ctxt = rcd->ctxt;
243*4882a593Smuzhiyun 	kinfo->spi_subctxt = subctxt_fp(fp);
244*4882a593Smuzhiyun 	kinfo->spi_sw_version = QIB_KERN_SWVERSION;
245*4882a593Smuzhiyun 	kinfo->spi_sw_version |= 1U << 31; /* QLogic-built, not kernel.org */
246*4882a593Smuzhiyun 	kinfo->spi_hw_version = dd->revision;
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	if (master)
249*4882a593Smuzhiyun 		kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
252*4882a593Smuzhiyun 	if (copy_to_user(ubase, kinfo, sz))
253*4882a593Smuzhiyun 		ret = -EFAULT;
254*4882a593Smuzhiyun bail:
255*4882a593Smuzhiyun 	kfree(kinfo);
256*4882a593Smuzhiyun 	return ret;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun /**
260*4882a593Smuzhiyun  * qib_tid_update - update a context TID
261*4882a593Smuzhiyun  * @rcd: the context
262*4882a593Smuzhiyun  * @fp: the qib device file
263*4882a593Smuzhiyun  * @ti: the TID information
264*4882a593Smuzhiyun  *
265*4882a593Smuzhiyun  * The new implementation as of Oct 2004 is that the driver assigns
266*4882a593Smuzhiyun  * the tid and returns it to the caller.   To reduce search time, we
267*4882a593Smuzhiyun  * keep a cursor for each context, walking the shadow tid array to find
268*4882a593Smuzhiyun  * one that's not in use.
269*4882a593Smuzhiyun  *
270*4882a593Smuzhiyun  * For now, if we can't allocate the full list, we fail, although
271*4882a593Smuzhiyun  * in the long run, we'll allocate as many as we can, and the
272*4882a593Smuzhiyun  * caller will deal with that by trying the remaining pages later.
273*4882a593Smuzhiyun  * That means that when we fail, we have to mark the tids as not in
274*4882a593Smuzhiyun  * use again, in our shadow copy.
275*4882a593Smuzhiyun  *
276*4882a593Smuzhiyun  * It's up to the caller to free the tids when they are done.
277*4882a593Smuzhiyun  * We'll unlock the pages as they free them.
278*4882a593Smuzhiyun  *
279*4882a593Smuzhiyun  * Also, right now we are locking one page at a time, but since
280*4882a593Smuzhiyun  * the intended use of this routine is for a single group of
281*4882a593Smuzhiyun  * virtually contiguous pages, that should change to improve
282*4882a593Smuzhiyun  * performance.
283*4882a593Smuzhiyun  */
qib_tid_update(struct qib_ctxtdata * rcd,struct file * fp,const struct qib_tid_info * ti)284*4882a593Smuzhiyun static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
285*4882a593Smuzhiyun 			  const struct qib_tid_info *ti)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun 	int ret = 0, ntids;
288*4882a593Smuzhiyun 	u32 tid, ctxttid, cnt, i, tidcnt, tidoff;
289*4882a593Smuzhiyun 	u16 *tidlist;
290*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
291*4882a593Smuzhiyun 	u64 physaddr;
292*4882a593Smuzhiyun 	unsigned long vaddr;
293*4882a593Smuzhiyun 	u64 __iomem *tidbase;
294*4882a593Smuzhiyun 	unsigned long tidmap[8];
295*4882a593Smuzhiyun 	struct page **pagep = NULL;
296*4882a593Smuzhiyun 	unsigned subctxt = subctxt_fp(fp);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	if (!dd->pageshadow) {
299*4882a593Smuzhiyun 		ret = -ENOMEM;
300*4882a593Smuzhiyun 		goto done;
301*4882a593Smuzhiyun 	}
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	cnt = ti->tidcnt;
304*4882a593Smuzhiyun 	if (!cnt) {
305*4882a593Smuzhiyun 		ret = -EFAULT;
306*4882a593Smuzhiyun 		goto done;
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun 	ctxttid = rcd->ctxt * dd->rcvtidcnt;
309*4882a593Smuzhiyun 	if (!rcd->subctxt_cnt) {
310*4882a593Smuzhiyun 		tidcnt = dd->rcvtidcnt;
311*4882a593Smuzhiyun 		tid = rcd->tidcursor;
312*4882a593Smuzhiyun 		tidoff = 0;
313*4882a593Smuzhiyun 	} else if (!subctxt) {
314*4882a593Smuzhiyun 		tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) +
315*4882a593Smuzhiyun 			 (dd->rcvtidcnt % rcd->subctxt_cnt);
316*4882a593Smuzhiyun 		tidoff = dd->rcvtidcnt - tidcnt;
317*4882a593Smuzhiyun 		ctxttid += tidoff;
318*4882a593Smuzhiyun 		tid = tidcursor_fp(fp);
319*4882a593Smuzhiyun 	} else {
320*4882a593Smuzhiyun 		tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt;
321*4882a593Smuzhiyun 		tidoff = tidcnt * (subctxt - 1);
322*4882a593Smuzhiyun 		ctxttid += tidoff;
323*4882a593Smuzhiyun 		tid = tidcursor_fp(fp);
324*4882a593Smuzhiyun 	}
325*4882a593Smuzhiyun 	if (cnt > tidcnt) {
326*4882a593Smuzhiyun 		/* make sure it all fits in tid_pg_list */
327*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
328*4882a593Smuzhiyun 			"Process tried to allocate %u TIDs, only trying max (%u)\n",
329*4882a593Smuzhiyun 			cnt, tidcnt);
330*4882a593Smuzhiyun 		cnt = tidcnt;
331*4882a593Smuzhiyun 	}
332*4882a593Smuzhiyun 	pagep = (struct page **) rcd->tid_pg_list;
333*4882a593Smuzhiyun 	tidlist = (u16 *) &pagep[dd->rcvtidcnt];
334*4882a593Smuzhiyun 	pagep += tidoff;
335*4882a593Smuzhiyun 	tidlist += tidoff;
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	memset(tidmap, 0, sizeof(tidmap));
338*4882a593Smuzhiyun 	/* before decrement; chip actual # */
339*4882a593Smuzhiyun 	ntids = tidcnt;
340*4882a593Smuzhiyun 	tidbase = (u64 __iomem *) (((char __iomem *) dd->kregbase) +
341*4882a593Smuzhiyun 				   dd->rcvtidbase +
342*4882a593Smuzhiyun 				   ctxttid * sizeof(*tidbase));
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 	/* virtual address of first page in transfer */
345*4882a593Smuzhiyun 	vaddr = ti->tidvaddr;
346*4882a593Smuzhiyun 	if (!access_ok((void __user *) vaddr,
347*4882a593Smuzhiyun 		       cnt * PAGE_SIZE)) {
348*4882a593Smuzhiyun 		ret = -EFAULT;
349*4882a593Smuzhiyun 		goto done;
350*4882a593Smuzhiyun 	}
351*4882a593Smuzhiyun 	ret = qib_get_user_pages(vaddr, cnt, pagep);
352*4882a593Smuzhiyun 	if (ret) {
353*4882a593Smuzhiyun 		/*
354*4882a593Smuzhiyun 		 * if (ret == -EBUSY)
355*4882a593Smuzhiyun 		 * We can't continue because the pagep array won't be
356*4882a593Smuzhiyun 		 * initialized. This should never happen,
357*4882a593Smuzhiyun 		 * unless perhaps the user has mpin'ed the pages
358*4882a593Smuzhiyun 		 * themselves.
359*4882a593Smuzhiyun 		 */
360*4882a593Smuzhiyun 		qib_devinfo(
361*4882a593Smuzhiyun 			dd->pcidev,
362*4882a593Smuzhiyun 			"Failed to lock addr %p, %u pages: errno %d\n",
363*4882a593Smuzhiyun 			(void *) vaddr, cnt, -ret);
364*4882a593Smuzhiyun 		goto done;
365*4882a593Smuzhiyun 	}
366*4882a593Smuzhiyun 	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
367*4882a593Smuzhiyun 		dma_addr_t daddr;
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 		for (; ntids--; tid++) {
370*4882a593Smuzhiyun 			if (tid == tidcnt)
371*4882a593Smuzhiyun 				tid = 0;
372*4882a593Smuzhiyun 			if (!dd->pageshadow[ctxttid + tid])
373*4882a593Smuzhiyun 				break;
374*4882a593Smuzhiyun 		}
375*4882a593Smuzhiyun 		if (ntids < 0) {
376*4882a593Smuzhiyun 			/*
377*4882a593Smuzhiyun 			 * Oops, wrapped all the way through their TIDs,
378*4882a593Smuzhiyun 			 * and didn't have enough free; see comments at
379*4882a593Smuzhiyun 			 * start of routine
380*4882a593Smuzhiyun 			 */
381*4882a593Smuzhiyun 			i--;    /* last tidlist[i] not filled in */
382*4882a593Smuzhiyun 			ret = -ENOMEM;
383*4882a593Smuzhiyun 			break;
384*4882a593Smuzhiyun 		}
385*4882a593Smuzhiyun 		ret = qib_map_page(dd->pcidev, pagep[i], &daddr);
386*4882a593Smuzhiyun 		if (ret)
387*4882a593Smuzhiyun 			break;
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		tidlist[i] = tid + tidoff;
390*4882a593Smuzhiyun 		/* we "know" system pages and TID pages are same size */
391*4882a593Smuzhiyun 		dd->pageshadow[ctxttid + tid] = pagep[i];
392*4882a593Smuzhiyun 		dd->physshadow[ctxttid + tid] = daddr;
393*4882a593Smuzhiyun 		/*
394*4882a593Smuzhiyun 		 * don't need atomic or it's overhead
395*4882a593Smuzhiyun 		 */
396*4882a593Smuzhiyun 		__set_bit(tid, tidmap);
397*4882a593Smuzhiyun 		physaddr = dd->physshadow[ctxttid + tid];
398*4882a593Smuzhiyun 		/* PERFORMANCE: below should almost certainly be cached */
399*4882a593Smuzhiyun 		dd->f_put_tid(dd, &tidbase[tid],
400*4882a593Smuzhiyun 				  RCVHQ_RCV_TYPE_EXPECTED, physaddr);
401*4882a593Smuzhiyun 		/*
402*4882a593Smuzhiyun 		 * don't check this tid in qib_ctxtshadow, since we
403*4882a593Smuzhiyun 		 * just filled it in; start with the next one.
404*4882a593Smuzhiyun 		 */
405*4882a593Smuzhiyun 		tid++;
406*4882a593Smuzhiyun 	}
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	if (ret) {
409*4882a593Smuzhiyun 		u32 limit;
410*4882a593Smuzhiyun cleanup:
411*4882a593Smuzhiyun 		/* jump here if copy out of updated info failed... */
412*4882a593Smuzhiyun 		/* same code that's in qib_free_tid() */
413*4882a593Smuzhiyun 		limit = sizeof(tidmap) * BITS_PER_BYTE;
414*4882a593Smuzhiyun 		if (limit > tidcnt)
415*4882a593Smuzhiyun 			/* just in case size changes in future */
416*4882a593Smuzhiyun 			limit = tidcnt;
417*4882a593Smuzhiyun 		tid = find_first_bit((const unsigned long *)tidmap, limit);
418*4882a593Smuzhiyun 		for (; tid < limit; tid++) {
419*4882a593Smuzhiyun 			if (!test_bit(tid, tidmap))
420*4882a593Smuzhiyun 				continue;
421*4882a593Smuzhiyun 			if (dd->pageshadow[ctxttid + tid]) {
422*4882a593Smuzhiyun 				dma_addr_t phys;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 				phys = dd->physshadow[ctxttid + tid];
425*4882a593Smuzhiyun 				dd->physshadow[ctxttid + tid] = dd->tidinvalid;
426*4882a593Smuzhiyun 				/* PERFORMANCE: below should almost certainly
427*4882a593Smuzhiyun 				 * be cached
428*4882a593Smuzhiyun 				 */
429*4882a593Smuzhiyun 				dd->f_put_tid(dd, &tidbase[tid],
430*4882a593Smuzhiyun 					      RCVHQ_RCV_TYPE_EXPECTED,
431*4882a593Smuzhiyun 					      dd->tidinvalid);
432*4882a593Smuzhiyun 				pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
433*4882a593Smuzhiyun 					       PCI_DMA_FROMDEVICE);
434*4882a593Smuzhiyun 				dd->pageshadow[ctxttid + tid] = NULL;
435*4882a593Smuzhiyun 			}
436*4882a593Smuzhiyun 		}
437*4882a593Smuzhiyun 		qib_release_user_pages(pagep, cnt);
438*4882a593Smuzhiyun 	} else {
439*4882a593Smuzhiyun 		/*
440*4882a593Smuzhiyun 		 * Copy the updated array, with qib_tid's filled in, back
441*4882a593Smuzhiyun 		 * to user.  Since we did the copy in already, this "should
442*4882a593Smuzhiyun 		 * never fail" If it does, we have to clean up...
443*4882a593Smuzhiyun 		 */
444*4882a593Smuzhiyun 		if (copy_to_user((void __user *)
445*4882a593Smuzhiyun 				 (unsigned long) ti->tidlist,
446*4882a593Smuzhiyun 				 tidlist, cnt * sizeof(*tidlist))) {
447*4882a593Smuzhiyun 			ret = -EFAULT;
448*4882a593Smuzhiyun 			goto cleanup;
449*4882a593Smuzhiyun 		}
450*4882a593Smuzhiyun 		if (copy_to_user(u64_to_user_ptr(ti->tidmap),
451*4882a593Smuzhiyun 				 tidmap, sizeof(tidmap))) {
452*4882a593Smuzhiyun 			ret = -EFAULT;
453*4882a593Smuzhiyun 			goto cleanup;
454*4882a593Smuzhiyun 		}
455*4882a593Smuzhiyun 		if (tid == tidcnt)
456*4882a593Smuzhiyun 			tid = 0;
457*4882a593Smuzhiyun 		if (!rcd->subctxt_cnt)
458*4882a593Smuzhiyun 			rcd->tidcursor = tid;
459*4882a593Smuzhiyun 		else
460*4882a593Smuzhiyun 			tidcursor_fp(fp) = tid;
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun done:
464*4882a593Smuzhiyun 	return ret;
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun /**
468*4882a593Smuzhiyun  * qib_tid_free - free a context TID
469*4882a593Smuzhiyun  * @rcd: the context
470*4882a593Smuzhiyun  * @subctxt: the subcontext
471*4882a593Smuzhiyun  * @ti: the TID info
472*4882a593Smuzhiyun  *
473*4882a593Smuzhiyun  * right now we are unlocking one page at a time, but since
474*4882a593Smuzhiyun  * the intended use of this routine is for a single group of
475*4882a593Smuzhiyun  * virtually contiguous pages, that should change to improve
476*4882a593Smuzhiyun  * performance.  We check that the TID is in range for this context
477*4882a593Smuzhiyun  * but otherwise don't check validity; if user has an error and
478*4882a593Smuzhiyun  * frees the wrong tid, it's only their own data that can thereby
479*4882a593Smuzhiyun  * be corrupted.  We do check that the TID was in use, for sanity
480*4882a593Smuzhiyun  * We always use our idea of the saved address, not the address that
481*4882a593Smuzhiyun  * they pass in to us.
482*4882a593Smuzhiyun  */
qib_tid_free(struct qib_ctxtdata * rcd,unsigned subctxt,const struct qib_tid_info * ti)483*4882a593Smuzhiyun static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt,
484*4882a593Smuzhiyun 			const struct qib_tid_info *ti)
485*4882a593Smuzhiyun {
486*4882a593Smuzhiyun 	int ret = 0;
487*4882a593Smuzhiyun 	u32 tid, ctxttid, cnt, limit, tidcnt;
488*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
489*4882a593Smuzhiyun 	u64 __iomem *tidbase;
490*4882a593Smuzhiyun 	unsigned long tidmap[8];
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	if (!dd->pageshadow) {
493*4882a593Smuzhiyun 		ret = -ENOMEM;
494*4882a593Smuzhiyun 		goto done;
495*4882a593Smuzhiyun 	}
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 	if (copy_from_user(tidmap, u64_to_user_ptr(ti->tidmap),
498*4882a593Smuzhiyun 			   sizeof(tidmap))) {
499*4882a593Smuzhiyun 		ret = -EFAULT;
500*4882a593Smuzhiyun 		goto done;
501*4882a593Smuzhiyun 	}
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	ctxttid = rcd->ctxt * dd->rcvtidcnt;
504*4882a593Smuzhiyun 	if (!rcd->subctxt_cnt)
505*4882a593Smuzhiyun 		tidcnt = dd->rcvtidcnt;
506*4882a593Smuzhiyun 	else if (!subctxt) {
507*4882a593Smuzhiyun 		tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) +
508*4882a593Smuzhiyun 			 (dd->rcvtidcnt % rcd->subctxt_cnt);
509*4882a593Smuzhiyun 		ctxttid += dd->rcvtidcnt - tidcnt;
510*4882a593Smuzhiyun 	} else {
511*4882a593Smuzhiyun 		tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt;
512*4882a593Smuzhiyun 		ctxttid += tidcnt * (subctxt - 1);
513*4882a593Smuzhiyun 	}
514*4882a593Smuzhiyun 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->kregbase) +
515*4882a593Smuzhiyun 				   dd->rcvtidbase +
516*4882a593Smuzhiyun 				   ctxttid * sizeof(*tidbase));
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	limit = sizeof(tidmap) * BITS_PER_BYTE;
519*4882a593Smuzhiyun 	if (limit > tidcnt)
520*4882a593Smuzhiyun 		/* just in case size changes in future */
521*4882a593Smuzhiyun 		limit = tidcnt;
522*4882a593Smuzhiyun 	tid = find_first_bit(tidmap, limit);
523*4882a593Smuzhiyun 	for (cnt = 0; tid < limit; tid++) {
524*4882a593Smuzhiyun 		/*
525*4882a593Smuzhiyun 		 * small optimization; if we detect a run of 3 or so without
526*4882a593Smuzhiyun 		 * any set, use find_first_bit again.  That's mainly to
527*4882a593Smuzhiyun 		 * accelerate the case where we wrapped, so we have some at
528*4882a593Smuzhiyun 		 * the beginning, and some at the end, and a big gap
529*4882a593Smuzhiyun 		 * in the middle.
530*4882a593Smuzhiyun 		 */
531*4882a593Smuzhiyun 		if (!test_bit(tid, tidmap))
532*4882a593Smuzhiyun 			continue;
533*4882a593Smuzhiyun 		cnt++;
534*4882a593Smuzhiyun 		if (dd->pageshadow[ctxttid + tid]) {
535*4882a593Smuzhiyun 			struct page *p;
536*4882a593Smuzhiyun 			dma_addr_t phys;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 			p = dd->pageshadow[ctxttid + tid];
539*4882a593Smuzhiyun 			dd->pageshadow[ctxttid + tid] = NULL;
540*4882a593Smuzhiyun 			phys = dd->physshadow[ctxttid + tid];
541*4882a593Smuzhiyun 			dd->physshadow[ctxttid + tid] = dd->tidinvalid;
542*4882a593Smuzhiyun 			/* PERFORMANCE: below should almost certainly be
543*4882a593Smuzhiyun 			 * cached
544*4882a593Smuzhiyun 			 */
545*4882a593Smuzhiyun 			dd->f_put_tid(dd, &tidbase[tid],
546*4882a593Smuzhiyun 				      RCVHQ_RCV_TYPE_EXPECTED, dd->tidinvalid);
547*4882a593Smuzhiyun 			pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
548*4882a593Smuzhiyun 				       PCI_DMA_FROMDEVICE);
549*4882a593Smuzhiyun 			qib_release_user_pages(&p, 1);
550*4882a593Smuzhiyun 		}
551*4882a593Smuzhiyun 	}
552*4882a593Smuzhiyun done:
553*4882a593Smuzhiyun 	return ret;
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun /**
557*4882a593Smuzhiyun  * qib_set_part_key - set a partition key
558*4882a593Smuzhiyun  * @rcd: the context
559*4882a593Smuzhiyun  * @key: the key
560*4882a593Smuzhiyun  *
561*4882a593Smuzhiyun  * We can have up to 4 active at a time (other than the default, which is
562*4882a593Smuzhiyun  * always allowed).  This is somewhat tricky, since multiple contexts may set
563*4882a593Smuzhiyun  * the same key, so we reference count them, and clean up at exit.  All 4
564*4882a593Smuzhiyun  * partition keys are packed into a single qlogic_ib register.  It's an
565*4882a593Smuzhiyun  * error for a process to set the same pkey multiple times.  We provide no
566*4882a593Smuzhiyun  * mechanism to de-allocate a pkey at this time, we may eventually need to
567*4882a593Smuzhiyun  * do that.  I've used the atomic operations, and no locking, and only make
568*4882a593Smuzhiyun  * a single pass through what's available.  This should be more than
569*4882a593Smuzhiyun  * adequate for some time. I'll think about spinlocks or the like if and as
570*4882a593Smuzhiyun  * it's necessary.
571*4882a593Smuzhiyun  */
qib_set_part_key(struct qib_ctxtdata * rcd,u16 key)572*4882a593Smuzhiyun static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
573*4882a593Smuzhiyun {
574*4882a593Smuzhiyun 	struct qib_pportdata *ppd = rcd->ppd;
575*4882a593Smuzhiyun 	int i, pidx = -1;
576*4882a593Smuzhiyun 	bool any = false;
577*4882a593Smuzhiyun 	u16 lkey = key & 0x7FFF;
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 	if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF))
580*4882a593Smuzhiyun 		/* nothing to do; this key always valid */
581*4882a593Smuzhiyun 		return 0;
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 	if (!lkey)
584*4882a593Smuzhiyun 		return -EINVAL;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	/*
587*4882a593Smuzhiyun 	 * Set the full membership bit, because it has to be
588*4882a593Smuzhiyun 	 * set in the register or the packet, and it seems
589*4882a593Smuzhiyun 	 * cleaner to set in the register than to force all
590*4882a593Smuzhiyun 	 * callers to set it.
591*4882a593Smuzhiyun 	 */
592*4882a593Smuzhiyun 	key |= 0x8000;
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
595*4882a593Smuzhiyun 		if (!rcd->pkeys[i] && pidx == -1)
596*4882a593Smuzhiyun 			pidx = i;
597*4882a593Smuzhiyun 		if (rcd->pkeys[i] == key)
598*4882a593Smuzhiyun 			return -EEXIST;
599*4882a593Smuzhiyun 	}
600*4882a593Smuzhiyun 	if (pidx == -1)
601*4882a593Smuzhiyun 		return -EBUSY;
602*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
603*4882a593Smuzhiyun 		if (!ppd->pkeys[i]) {
604*4882a593Smuzhiyun 			any = true;
605*4882a593Smuzhiyun 			continue;
606*4882a593Smuzhiyun 		}
607*4882a593Smuzhiyun 		if (ppd->pkeys[i] == key) {
608*4882a593Smuzhiyun 			atomic_t *pkrefs = &ppd->pkeyrefs[i];
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 			if (atomic_inc_return(pkrefs) > 1) {
611*4882a593Smuzhiyun 				rcd->pkeys[pidx] = key;
612*4882a593Smuzhiyun 				return 0;
613*4882a593Smuzhiyun 			}
614*4882a593Smuzhiyun 			/*
615*4882a593Smuzhiyun 			 * lost race, decrement count, catch below
616*4882a593Smuzhiyun 			 */
617*4882a593Smuzhiyun 			atomic_dec(pkrefs);
618*4882a593Smuzhiyun 			any = true;
619*4882a593Smuzhiyun 		}
620*4882a593Smuzhiyun 		if ((ppd->pkeys[i] & 0x7FFF) == lkey)
621*4882a593Smuzhiyun 			/*
622*4882a593Smuzhiyun 			 * It makes no sense to have both the limited and
623*4882a593Smuzhiyun 			 * full membership PKEY set at the same time since
624*4882a593Smuzhiyun 			 * the unlimited one will disable the limited one.
625*4882a593Smuzhiyun 			 */
626*4882a593Smuzhiyun 			return -EEXIST;
627*4882a593Smuzhiyun 	}
628*4882a593Smuzhiyun 	if (!any)
629*4882a593Smuzhiyun 		return -EBUSY;
630*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
631*4882a593Smuzhiyun 		if (!ppd->pkeys[i] &&
632*4882a593Smuzhiyun 		    atomic_inc_return(&ppd->pkeyrefs[i]) == 1) {
633*4882a593Smuzhiyun 			rcd->pkeys[pidx] = key;
634*4882a593Smuzhiyun 			ppd->pkeys[i] = key;
635*4882a593Smuzhiyun 			(void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
636*4882a593Smuzhiyun 			return 0;
637*4882a593Smuzhiyun 		}
638*4882a593Smuzhiyun 	}
639*4882a593Smuzhiyun 	return -EBUSY;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun /**
643*4882a593Smuzhiyun  * qib_manage_rcvq - manage a context's receive queue
644*4882a593Smuzhiyun  * @rcd: the context
645*4882a593Smuzhiyun  * @subctxt: the subcontext
646*4882a593Smuzhiyun  * @start_stop: action to carry out
647*4882a593Smuzhiyun  *
648*4882a593Smuzhiyun  * start_stop == 0 disables receive on the context, for use in queue
649*4882a593Smuzhiyun  * overflow conditions.  start_stop==1 re-enables, to be used to
650*4882a593Smuzhiyun  * re-init the software copy of the head register
651*4882a593Smuzhiyun  */
qib_manage_rcvq(struct qib_ctxtdata * rcd,unsigned subctxt,int start_stop)652*4882a593Smuzhiyun static int qib_manage_rcvq(struct qib_ctxtdata *rcd, unsigned subctxt,
653*4882a593Smuzhiyun 			   int start_stop)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
656*4882a593Smuzhiyun 	unsigned int rcvctrl_op;
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 	if (subctxt)
659*4882a593Smuzhiyun 		goto bail;
660*4882a593Smuzhiyun 	/* atomically clear receive enable ctxt. */
661*4882a593Smuzhiyun 	if (start_stop) {
662*4882a593Smuzhiyun 		/*
663*4882a593Smuzhiyun 		 * On enable, force in-memory copy of the tail register to
664*4882a593Smuzhiyun 		 * 0, so that protocol code doesn't have to worry about
665*4882a593Smuzhiyun 		 * whether or not the chip has yet updated the in-memory
666*4882a593Smuzhiyun 		 * copy or not on return from the system call. The chip
667*4882a593Smuzhiyun 		 * always resets it's tail register back to 0 on a
668*4882a593Smuzhiyun 		 * transition from disabled to enabled.
669*4882a593Smuzhiyun 		 */
670*4882a593Smuzhiyun 		if (rcd->rcvhdrtail_kvaddr)
671*4882a593Smuzhiyun 			qib_clear_rcvhdrtail(rcd);
672*4882a593Smuzhiyun 		rcvctrl_op = QIB_RCVCTRL_CTXT_ENB;
673*4882a593Smuzhiyun 	} else
674*4882a593Smuzhiyun 		rcvctrl_op = QIB_RCVCTRL_CTXT_DIS;
675*4882a593Smuzhiyun 	dd->f_rcvctrl(rcd->ppd, rcvctrl_op, rcd->ctxt);
676*4882a593Smuzhiyun 	/* always; new head should be equal to new tail; see above */
677*4882a593Smuzhiyun bail:
678*4882a593Smuzhiyun 	return 0;
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun 
qib_clean_part_key(struct qib_ctxtdata * rcd,struct qib_devdata * dd)681*4882a593Smuzhiyun static void qib_clean_part_key(struct qib_ctxtdata *rcd,
682*4882a593Smuzhiyun 			       struct qib_devdata *dd)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun 	int i, j, pchanged = 0;
685*4882a593Smuzhiyun 	struct qib_pportdata *ppd = rcd->ppd;
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
688*4882a593Smuzhiyun 		if (!rcd->pkeys[i])
689*4882a593Smuzhiyun 			continue;
690*4882a593Smuzhiyun 		for (j = 0; j < ARRAY_SIZE(ppd->pkeys); j++) {
691*4882a593Smuzhiyun 			/* check for match independent of the global bit */
692*4882a593Smuzhiyun 			if ((ppd->pkeys[j] & 0x7fff) !=
693*4882a593Smuzhiyun 			    (rcd->pkeys[i] & 0x7fff))
694*4882a593Smuzhiyun 				continue;
695*4882a593Smuzhiyun 			if (atomic_dec_and_test(&ppd->pkeyrefs[j])) {
696*4882a593Smuzhiyun 				ppd->pkeys[j] = 0;
697*4882a593Smuzhiyun 				pchanged++;
698*4882a593Smuzhiyun 			}
699*4882a593Smuzhiyun 			break;
700*4882a593Smuzhiyun 		}
701*4882a593Smuzhiyun 		rcd->pkeys[i] = 0;
702*4882a593Smuzhiyun 	}
703*4882a593Smuzhiyun 	if (pchanged)
704*4882a593Smuzhiyun 		(void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
705*4882a593Smuzhiyun }
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun /* common code for the mappings on dma_alloc_coherent mem */
qib_mmap_mem(struct vm_area_struct * vma,struct qib_ctxtdata * rcd,unsigned len,void * kvaddr,u32 write_ok,char * what)708*4882a593Smuzhiyun static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd,
709*4882a593Smuzhiyun 			unsigned len, void *kvaddr, u32 write_ok, char *what)
710*4882a593Smuzhiyun {
711*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
712*4882a593Smuzhiyun 	unsigned long pfn;
713*4882a593Smuzhiyun 	int ret;
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 	if ((vma->vm_end - vma->vm_start) > len) {
716*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
717*4882a593Smuzhiyun 			 "FAIL on %s: len %lx > %x\n", what,
718*4882a593Smuzhiyun 			 vma->vm_end - vma->vm_start, len);
719*4882a593Smuzhiyun 		ret = -EFAULT;
720*4882a593Smuzhiyun 		goto bail;
721*4882a593Smuzhiyun 	}
722*4882a593Smuzhiyun 
723*4882a593Smuzhiyun 	/*
724*4882a593Smuzhiyun 	 * shared context user code requires rcvhdrq mapped r/w, others
725*4882a593Smuzhiyun 	 * only allowed readonly mapping.
726*4882a593Smuzhiyun 	 */
727*4882a593Smuzhiyun 	if (!write_ok) {
728*4882a593Smuzhiyun 		if (vma->vm_flags & VM_WRITE) {
729*4882a593Smuzhiyun 			qib_devinfo(dd->pcidev,
730*4882a593Smuzhiyun 				 "%s must be mapped readonly\n", what);
731*4882a593Smuzhiyun 			ret = -EPERM;
732*4882a593Smuzhiyun 			goto bail;
733*4882a593Smuzhiyun 		}
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 		/* don't allow them to later change with mprotect */
736*4882a593Smuzhiyun 		vma->vm_flags &= ~VM_MAYWRITE;
737*4882a593Smuzhiyun 	}
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
740*4882a593Smuzhiyun 	ret = remap_pfn_range(vma, vma->vm_start, pfn,
741*4882a593Smuzhiyun 			      len, vma->vm_page_prot);
742*4882a593Smuzhiyun 	if (ret)
743*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
744*4882a593Smuzhiyun 			"%s ctxt%u mmap of %lx, %x bytes failed: %d\n",
745*4882a593Smuzhiyun 			what, rcd->ctxt, pfn, len, ret);
746*4882a593Smuzhiyun bail:
747*4882a593Smuzhiyun 	return ret;
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun 
mmap_ureg(struct vm_area_struct * vma,struct qib_devdata * dd,u64 ureg)750*4882a593Smuzhiyun static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd,
751*4882a593Smuzhiyun 		     u64 ureg)
752*4882a593Smuzhiyun {
753*4882a593Smuzhiyun 	unsigned long phys;
754*4882a593Smuzhiyun 	unsigned long sz;
755*4882a593Smuzhiyun 	int ret;
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 	/*
758*4882a593Smuzhiyun 	 * This is real hardware, so use io_remap.  This is the mechanism
759*4882a593Smuzhiyun 	 * for the user process to update the head registers for their ctxt
760*4882a593Smuzhiyun 	 * in the chip.
761*4882a593Smuzhiyun 	 */
762*4882a593Smuzhiyun 	sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE;
763*4882a593Smuzhiyun 	if ((vma->vm_end - vma->vm_start) > sz) {
764*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
765*4882a593Smuzhiyun 			"FAIL mmap userreg: reqlen %lx > PAGE\n",
766*4882a593Smuzhiyun 			vma->vm_end - vma->vm_start);
767*4882a593Smuzhiyun 		ret = -EFAULT;
768*4882a593Smuzhiyun 	} else {
769*4882a593Smuzhiyun 		phys = dd->physaddr + ureg;
770*4882a593Smuzhiyun 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
773*4882a593Smuzhiyun 		ret = io_remap_pfn_range(vma, vma->vm_start,
774*4882a593Smuzhiyun 					 phys >> PAGE_SHIFT,
775*4882a593Smuzhiyun 					 vma->vm_end - vma->vm_start,
776*4882a593Smuzhiyun 					 vma->vm_page_prot);
777*4882a593Smuzhiyun 	}
778*4882a593Smuzhiyun 	return ret;
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun 
mmap_piobufs(struct vm_area_struct * vma,struct qib_devdata * dd,struct qib_ctxtdata * rcd,unsigned piobufs,unsigned piocnt)781*4882a593Smuzhiyun static int mmap_piobufs(struct vm_area_struct *vma,
782*4882a593Smuzhiyun 			struct qib_devdata *dd,
783*4882a593Smuzhiyun 			struct qib_ctxtdata *rcd,
784*4882a593Smuzhiyun 			unsigned piobufs, unsigned piocnt)
785*4882a593Smuzhiyun {
786*4882a593Smuzhiyun 	unsigned long phys;
787*4882a593Smuzhiyun 	int ret;
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 	/*
790*4882a593Smuzhiyun 	 * When we map the PIO buffers in the chip, we want to map them as
791*4882a593Smuzhiyun 	 * writeonly, no read possible; unfortunately, x86 doesn't allow
792*4882a593Smuzhiyun 	 * for this in hardware, but we still prevent users from asking
793*4882a593Smuzhiyun 	 * for it.
794*4882a593Smuzhiyun 	 */
795*4882a593Smuzhiyun 	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) {
796*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
797*4882a593Smuzhiyun 			"FAIL mmap piobufs: reqlen %lx > PAGE\n",
798*4882a593Smuzhiyun 			 vma->vm_end - vma->vm_start);
799*4882a593Smuzhiyun 		ret = -EINVAL;
800*4882a593Smuzhiyun 		goto bail;
801*4882a593Smuzhiyun 	}
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 	phys = dd->physaddr + piobufs;
804*4882a593Smuzhiyun 
805*4882a593Smuzhiyun #if defined(__powerpc__)
806*4882a593Smuzhiyun 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
807*4882a593Smuzhiyun #endif
808*4882a593Smuzhiyun 
809*4882a593Smuzhiyun 	/*
810*4882a593Smuzhiyun 	 * don't allow them to later change to readable with mprotect (for when
811*4882a593Smuzhiyun 	 * not initially mapped readable, as is normally the case)
812*4882a593Smuzhiyun 	 */
813*4882a593Smuzhiyun 	vma->vm_flags &= ~VM_MAYREAD;
814*4882a593Smuzhiyun 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	/* We used PAT if wc_cookie == 0 */
817*4882a593Smuzhiyun 	if (!dd->wc_cookie)
818*4882a593Smuzhiyun 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
819*4882a593Smuzhiyun 
820*4882a593Smuzhiyun 	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
821*4882a593Smuzhiyun 				 vma->vm_end - vma->vm_start,
822*4882a593Smuzhiyun 				 vma->vm_page_prot);
823*4882a593Smuzhiyun bail:
824*4882a593Smuzhiyun 	return ret;
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun 
mmap_rcvegrbufs(struct vm_area_struct * vma,struct qib_ctxtdata * rcd)827*4882a593Smuzhiyun static int mmap_rcvegrbufs(struct vm_area_struct *vma,
828*4882a593Smuzhiyun 			   struct qib_ctxtdata *rcd)
829*4882a593Smuzhiyun {
830*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
831*4882a593Smuzhiyun 	unsigned long start, size;
832*4882a593Smuzhiyun 	size_t total_size, i;
833*4882a593Smuzhiyun 	unsigned long pfn;
834*4882a593Smuzhiyun 	int ret;
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	size = rcd->rcvegrbuf_size;
837*4882a593Smuzhiyun 	total_size = rcd->rcvegrbuf_chunks * size;
838*4882a593Smuzhiyun 	if ((vma->vm_end - vma->vm_start) > total_size) {
839*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
840*4882a593Smuzhiyun 			"FAIL on egr bufs: reqlen %lx > actual %lx\n",
841*4882a593Smuzhiyun 			 vma->vm_end - vma->vm_start,
842*4882a593Smuzhiyun 			 (unsigned long) total_size);
843*4882a593Smuzhiyun 		ret = -EINVAL;
844*4882a593Smuzhiyun 		goto bail;
845*4882a593Smuzhiyun 	}
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun 	if (vma->vm_flags & VM_WRITE) {
848*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
849*4882a593Smuzhiyun 			"Can't map eager buffers as writable (flags=%lx)\n",
850*4882a593Smuzhiyun 			vma->vm_flags);
851*4882a593Smuzhiyun 		ret = -EPERM;
852*4882a593Smuzhiyun 		goto bail;
853*4882a593Smuzhiyun 	}
854*4882a593Smuzhiyun 	/* don't allow them to later change to writeable with mprotect */
855*4882a593Smuzhiyun 	vma->vm_flags &= ~VM_MAYWRITE;
856*4882a593Smuzhiyun 
857*4882a593Smuzhiyun 	start = vma->vm_start;
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 	for (i = 0; i < rcd->rcvegrbuf_chunks; i++, start += size) {
860*4882a593Smuzhiyun 		pfn = virt_to_phys(rcd->rcvegrbuf[i]) >> PAGE_SHIFT;
861*4882a593Smuzhiyun 		ret = remap_pfn_range(vma, start, pfn, size,
862*4882a593Smuzhiyun 				      vma->vm_page_prot);
863*4882a593Smuzhiyun 		if (ret < 0)
864*4882a593Smuzhiyun 			goto bail;
865*4882a593Smuzhiyun 	}
866*4882a593Smuzhiyun 	ret = 0;
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun bail:
869*4882a593Smuzhiyun 	return ret;
870*4882a593Smuzhiyun }
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun /*
873*4882a593Smuzhiyun  * qib_file_vma_fault - handle a VMA page fault.
874*4882a593Smuzhiyun  */
qib_file_vma_fault(struct vm_fault * vmf)875*4882a593Smuzhiyun static vm_fault_t qib_file_vma_fault(struct vm_fault *vmf)
876*4882a593Smuzhiyun {
877*4882a593Smuzhiyun 	struct page *page;
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
880*4882a593Smuzhiyun 	if (!page)
881*4882a593Smuzhiyun 		return VM_FAULT_SIGBUS;
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	get_page(page);
884*4882a593Smuzhiyun 	vmf->page = page;
885*4882a593Smuzhiyun 
886*4882a593Smuzhiyun 	return 0;
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun static const struct vm_operations_struct qib_file_vm_ops = {
890*4882a593Smuzhiyun 	.fault = qib_file_vma_fault,
891*4882a593Smuzhiyun };
892*4882a593Smuzhiyun 
mmap_kvaddr(struct vm_area_struct * vma,u64 pgaddr,struct qib_ctxtdata * rcd,unsigned subctxt)893*4882a593Smuzhiyun static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
894*4882a593Smuzhiyun 		       struct qib_ctxtdata *rcd, unsigned subctxt)
895*4882a593Smuzhiyun {
896*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
897*4882a593Smuzhiyun 	unsigned subctxt_cnt;
898*4882a593Smuzhiyun 	unsigned long len;
899*4882a593Smuzhiyun 	void *addr;
900*4882a593Smuzhiyun 	size_t size;
901*4882a593Smuzhiyun 	int ret = 0;
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun 	subctxt_cnt = rcd->subctxt_cnt;
904*4882a593Smuzhiyun 	size = rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size;
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun 	/*
907*4882a593Smuzhiyun 	 * Each process has all the subctxt uregbase, rcvhdrq, and
908*4882a593Smuzhiyun 	 * rcvegrbufs mmapped - as an array for all the processes,
909*4882a593Smuzhiyun 	 * and also separately for this process.
910*4882a593Smuzhiyun 	 */
911*4882a593Smuzhiyun 	if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase)) {
912*4882a593Smuzhiyun 		addr = rcd->subctxt_uregbase;
913*4882a593Smuzhiyun 		size = PAGE_SIZE * subctxt_cnt;
914*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base)) {
915*4882a593Smuzhiyun 		addr = rcd->subctxt_rcvhdr_base;
916*4882a593Smuzhiyun 		size = rcd->rcvhdrq_size * subctxt_cnt;
917*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf)) {
918*4882a593Smuzhiyun 		addr = rcd->subctxt_rcvegrbuf;
919*4882a593Smuzhiyun 		size *= subctxt_cnt;
920*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase +
921*4882a593Smuzhiyun 					PAGE_SIZE * subctxt)) {
922*4882a593Smuzhiyun 		addr = rcd->subctxt_uregbase + PAGE_SIZE * subctxt;
923*4882a593Smuzhiyun 		size = PAGE_SIZE;
924*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base +
925*4882a593Smuzhiyun 					rcd->rcvhdrq_size * subctxt)) {
926*4882a593Smuzhiyun 		addr = rcd->subctxt_rcvhdr_base +
927*4882a593Smuzhiyun 			rcd->rcvhdrq_size * subctxt;
928*4882a593Smuzhiyun 		size = rcd->rcvhdrq_size;
929*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(&rcd->user_event_mask[subctxt])) {
930*4882a593Smuzhiyun 		addr = rcd->user_event_mask;
931*4882a593Smuzhiyun 		size = PAGE_SIZE;
932*4882a593Smuzhiyun 	} else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf +
933*4882a593Smuzhiyun 					size * subctxt)) {
934*4882a593Smuzhiyun 		addr = rcd->subctxt_rcvegrbuf + size * subctxt;
935*4882a593Smuzhiyun 		/* rcvegrbufs are read-only on the slave */
936*4882a593Smuzhiyun 		if (vma->vm_flags & VM_WRITE) {
937*4882a593Smuzhiyun 			qib_devinfo(dd->pcidev,
938*4882a593Smuzhiyun 				 "Can't map eager buffers as writable (flags=%lx)\n",
939*4882a593Smuzhiyun 				 vma->vm_flags);
940*4882a593Smuzhiyun 			ret = -EPERM;
941*4882a593Smuzhiyun 			goto bail;
942*4882a593Smuzhiyun 		}
943*4882a593Smuzhiyun 		/*
944*4882a593Smuzhiyun 		 * Don't allow permission to later change to writeable
945*4882a593Smuzhiyun 		 * with mprotect.
946*4882a593Smuzhiyun 		 */
947*4882a593Smuzhiyun 		vma->vm_flags &= ~VM_MAYWRITE;
948*4882a593Smuzhiyun 	} else
949*4882a593Smuzhiyun 		goto bail;
950*4882a593Smuzhiyun 	len = vma->vm_end - vma->vm_start;
951*4882a593Smuzhiyun 	if (len > size) {
952*4882a593Smuzhiyun 		ret = -EINVAL;
953*4882a593Smuzhiyun 		goto bail;
954*4882a593Smuzhiyun 	}
955*4882a593Smuzhiyun 
956*4882a593Smuzhiyun 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
957*4882a593Smuzhiyun 	vma->vm_ops = &qib_file_vm_ops;
958*4882a593Smuzhiyun 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
959*4882a593Smuzhiyun 	ret = 1;
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun bail:
962*4882a593Smuzhiyun 	return ret;
963*4882a593Smuzhiyun }
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun /**
966*4882a593Smuzhiyun  * qib_mmapf - mmap various structures into user space
967*4882a593Smuzhiyun  * @fp: the file pointer
968*4882a593Smuzhiyun  * @vma: the VM area
969*4882a593Smuzhiyun  *
970*4882a593Smuzhiyun  * We use this to have a shared buffer between the kernel and the user code
971*4882a593Smuzhiyun  * for the rcvhdr queue, egr buffers, and the per-context user regs and pio
972*4882a593Smuzhiyun  * buffers in the chip.  We have the open and close entries so we can bump
973*4882a593Smuzhiyun  * the ref count and keep the driver from being unloaded while still mapped.
974*4882a593Smuzhiyun  */
qib_mmapf(struct file * fp,struct vm_area_struct * vma)975*4882a593Smuzhiyun static int qib_mmapf(struct file *fp, struct vm_area_struct *vma)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
978*4882a593Smuzhiyun 	struct qib_devdata *dd;
979*4882a593Smuzhiyun 	u64 pgaddr, ureg;
980*4882a593Smuzhiyun 	unsigned piobufs, piocnt;
981*4882a593Smuzhiyun 	int ret, match = 1;
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	rcd = ctxt_fp(fp);
984*4882a593Smuzhiyun 	if (!rcd || !(vma->vm_flags & VM_SHARED)) {
985*4882a593Smuzhiyun 		ret = -EINVAL;
986*4882a593Smuzhiyun 		goto bail;
987*4882a593Smuzhiyun 	}
988*4882a593Smuzhiyun 	dd = rcd->dd;
989*4882a593Smuzhiyun 
990*4882a593Smuzhiyun 	/*
991*4882a593Smuzhiyun 	 * This is the qib_do_user_init() code, mapping the shared buffers
992*4882a593Smuzhiyun 	 * and per-context user registers into the user process. The address
993*4882a593Smuzhiyun 	 * referred to by vm_pgoff is the file offset passed via mmap().
994*4882a593Smuzhiyun 	 * For shared contexts, this is the kernel vmalloc() address of the
995*4882a593Smuzhiyun 	 * pages to share with the master.
996*4882a593Smuzhiyun 	 * For non-shared or master ctxts, this is a physical address.
997*4882a593Smuzhiyun 	 * We only do one mmap for each space mapped.
998*4882a593Smuzhiyun 	 */
999*4882a593Smuzhiyun 	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	/*
1002*4882a593Smuzhiyun 	 * Check for 0 in case one of the allocations failed, but user
1003*4882a593Smuzhiyun 	 * called mmap anyway.
1004*4882a593Smuzhiyun 	 */
1005*4882a593Smuzhiyun 	if (!pgaddr)  {
1006*4882a593Smuzhiyun 		ret = -EINVAL;
1007*4882a593Smuzhiyun 		goto bail;
1008*4882a593Smuzhiyun 	}
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 	/*
1011*4882a593Smuzhiyun 	 * Physical addresses must fit in 40 bits for our hardware.
1012*4882a593Smuzhiyun 	 * Check for kernel virtual addresses first, anything else must
1013*4882a593Smuzhiyun 	 * match a HW or memory address.
1014*4882a593Smuzhiyun 	 */
1015*4882a593Smuzhiyun 	ret = mmap_kvaddr(vma, pgaddr, rcd, subctxt_fp(fp));
1016*4882a593Smuzhiyun 	if (ret) {
1017*4882a593Smuzhiyun 		if (ret > 0)
1018*4882a593Smuzhiyun 			ret = 0;
1019*4882a593Smuzhiyun 		goto bail;
1020*4882a593Smuzhiyun 	}
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun 	ureg = dd->uregbase + dd->ureg_align * rcd->ctxt;
1023*4882a593Smuzhiyun 	if (!rcd->subctxt_cnt) {
1024*4882a593Smuzhiyun 		/* ctxt is not shared */
1025*4882a593Smuzhiyun 		piocnt = rcd->piocnt;
1026*4882a593Smuzhiyun 		piobufs = rcd->piobufs;
1027*4882a593Smuzhiyun 	} else if (!subctxt_fp(fp)) {
1028*4882a593Smuzhiyun 		/* caller is the master */
1029*4882a593Smuzhiyun 		piocnt = (rcd->piocnt / rcd->subctxt_cnt) +
1030*4882a593Smuzhiyun 			 (rcd->piocnt % rcd->subctxt_cnt);
1031*4882a593Smuzhiyun 		piobufs = rcd->piobufs +
1032*4882a593Smuzhiyun 			dd->palign * (rcd->piocnt - piocnt);
1033*4882a593Smuzhiyun 	} else {
1034*4882a593Smuzhiyun 		unsigned slave = subctxt_fp(fp) - 1;
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun 		/* caller is a slave */
1037*4882a593Smuzhiyun 		piocnt = rcd->piocnt / rcd->subctxt_cnt;
1038*4882a593Smuzhiyun 		piobufs = rcd->piobufs + dd->palign * piocnt * slave;
1039*4882a593Smuzhiyun 	}
1040*4882a593Smuzhiyun 
1041*4882a593Smuzhiyun 	if (pgaddr == ureg)
1042*4882a593Smuzhiyun 		ret = mmap_ureg(vma, dd, ureg);
1043*4882a593Smuzhiyun 	else if (pgaddr == piobufs)
1044*4882a593Smuzhiyun 		ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt);
1045*4882a593Smuzhiyun 	else if (pgaddr == dd->pioavailregs_phys)
1046*4882a593Smuzhiyun 		/* in-memory copy of pioavail registers */
1047*4882a593Smuzhiyun 		ret = qib_mmap_mem(vma, rcd, PAGE_SIZE,
1048*4882a593Smuzhiyun 				   (void *) dd->pioavailregs_dma, 0,
1049*4882a593Smuzhiyun 				   "pioavail registers");
1050*4882a593Smuzhiyun 	else if (pgaddr == rcd->rcvegr_phys)
1051*4882a593Smuzhiyun 		ret = mmap_rcvegrbufs(vma, rcd);
1052*4882a593Smuzhiyun 	else if (pgaddr == (u64) rcd->rcvhdrq_phys)
1053*4882a593Smuzhiyun 		/*
1054*4882a593Smuzhiyun 		 * The rcvhdrq itself; multiple pages, contiguous
1055*4882a593Smuzhiyun 		 * from an i/o perspective.  Shared contexts need
1056*4882a593Smuzhiyun 		 * to map r/w, so we allow writing.
1057*4882a593Smuzhiyun 		 */
1058*4882a593Smuzhiyun 		ret = qib_mmap_mem(vma, rcd, rcd->rcvhdrq_size,
1059*4882a593Smuzhiyun 				   rcd->rcvhdrq, 1, "rcvhdrq");
1060*4882a593Smuzhiyun 	else if (pgaddr == (u64) rcd->rcvhdrqtailaddr_phys)
1061*4882a593Smuzhiyun 		/* in-memory copy of rcvhdrq tail register */
1062*4882a593Smuzhiyun 		ret = qib_mmap_mem(vma, rcd, PAGE_SIZE,
1063*4882a593Smuzhiyun 				   rcd->rcvhdrtail_kvaddr, 0,
1064*4882a593Smuzhiyun 				   "rcvhdrq tail");
1065*4882a593Smuzhiyun 	else
1066*4882a593Smuzhiyun 		match = 0;
1067*4882a593Smuzhiyun 	if (!match)
1068*4882a593Smuzhiyun 		ret = -EINVAL;
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 	vma->vm_private_data = NULL;
1071*4882a593Smuzhiyun 
1072*4882a593Smuzhiyun 	if (ret < 0)
1073*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
1074*4882a593Smuzhiyun 			 "mmap Failure %d: off %llx len %lx\n",
1075*4882a593Smuzhiyun 			 -ret, (unsigned long long)pgaddr,
1076*4882a593Smuzhiyun 			 vma->vm_end - vma->vm_start);
1077*4882a593Smuzhiyun bail:
1078*4882a593Smuzhiyun 	return ret;
1079*4882a593Smuzhiyun }
1080*4882a593Smuzhiyun 
qib_poll_urgent(struct qib_ctxtdata * rcd,struct file * fp,struct poll_table_struct * pt)1081*4882a593Smuzhiyun static __poll_t qib_poll_urgent(struct qib_ctxtdata *rcd,
1082*4882a593Smuzhiyun 				    struct file *fp,
1083*4882a593Smuzhiyun 				    struct poll_table_struct *pt)
1084*4882a593Smuzhiyun {
1085*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
1086*4882a593Smuzhiyun 	__poll_t pollflag;
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun 	poll_wait(fp, &rcd->wait, pt);
1089*4882a593Smuzhiyun 
1090*4882a593Smuzhiyun 	spin_lock_irq(&dd->uctxt_lock);
1091*4882a593Smuzhiyun 	if (rcd->urgent != rcd->urgent_poll) {
1092*4882a593Smuzhiyun 		pollflag = EPOLLIN | EPOLLRDNORM;
1093*4882a593Smuzhiyun 		rcd->urgent_poll = rcd->urgent;
1094*4882a593Smuzhiyun 	} else {
1095*4882a593Smuzhiyun 		pollflag = 0;
1096*4882a593Smuzhiyun 		set_bit(QIB_CTXT_WAITING_URG, &rcd->flag);
1097*4882a593Smuzhiyun 	}
1098*4882a593Smuzhiyun 	spin_unlock_irq(&dd->uctxt_lock);
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun 	return pollflag;
1101*4882a593Smuzhiyun }
1102*4882a593Smuzhiyun 
qib_poll_next(struct qib_ctxtdata * rcd,struct file * fp,struct poll_table_struct * pt)1103*4882a593Smuzhiyun static __poll_t qib_poll_next(struct qib_ctxtdata *rcd,
1104*4882a593Smuzhiyun 				  struct file *fp,
1105*4882a593Smuzhiyun 				  struct poll_table_struct *pt)
1106*4882a593Smuzhiyun {
1107*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
1108*4882a593Smuzhiyun 	__poll_t pollflag;
1109*4882a593Smuzhiyun 
1110*4882a593Smuzhiyun 	poll_wait(fp, &rcd->wait, pt);
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun 	spin_lock_irq(&dd->uctxt_lock);
1113*4882a593Smuzhiyun 	if (dd->f_hdrqempty(rcd)) {
1114*4882a593Smuzhiyun 		set_bit(QIB_CTXT_WAITING_RCV, &rcd->flag);
1115*4882a593Smuzhiyun 		dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_ENB, rcd->ctxt);
1116*4882a593Smuzhiyun 		pollflag = 0;
1117*4882a593Smuzhiyun 	} else
1118*4882a593Smuzhiyun 		pollflag = EPOLLIN | EPOLLRDNORM;
1119*4882a593Smuzhiyun 	spin_unlock_irq(&dd->uctxt_lock);
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun 	return pollflag;
1122*4882a593Smuzhiyun }
1123*4882a593Smuzhiyun 
qib_poll(struct file * fp,struct poll_table_struct * pt)1124*4882a593Smuzhiyun static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
1125*4882a593Smuzhiyun {
1126*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
1127*4882a593Smuzhiyun 	__poll_t pollflag;
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	rcd = ctxt_fp(fp);
1130*4882a593Smuzhiyun 	if (!rcd)
1131*4882a593Smuzhiyun 		pollflag = EPOLLERR;
1132*4882a593Smuzhiyun 	else if (rcd->poll_type == QIB_POLL_TYPE_URGENT)
1133*4882a593Smuzhiyun 		pollflag = qib_poll_urgent(rcd, fp, pt);
1134*4882a593Smuzhiyun 	else  if (rcd->poll_type == QIB_POLL_TYPE_ANYRCV)
1135*4882a593Smuzhiyun 		pollflag = qib_poll_next(rcd, fp, pt);
1136*4882a593Smuzhiyun 	else /* invalid */
1137*4882a593Smuzhiyun 		pollflag = EPOLLERR;
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	return pollflag;
1140*4882a593Smuzhiyun }
1141*4882a593Smuzhiyun 
assign_ctxt_affinity(struct file * fp,struct qib_devdata * dd)1142*4882a593Smuzhiyun static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
1143*4882a593Smuzhiyun {
1144*4882a593Smuzhiyun 	struct qib_filedata *fd = fp->private_data;
1145*4882a593Smuzhiyun 	const unsigned int weight = current->nr_cpus_allowed;
1146*4882a593Smuzhiyun 	const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
1147*4882a593Smuzhiyun 	int local_cpu;
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	/*
1150*4882a593Smuzhiyun 	 * If process has NOT already set it's affinity, select and
1151*4882a593Smuzhiyun 	 * reserve a processor for it on the local NUMA node.
1152*4882a593Smuzhiyun 	 */
1153*4882a593Smuzhiyun 	if ((weight >= qib_cpulist_count) &&
1154*4882a593Smuzhiyun 		(cpumask_weight(local_mask) <= qib_cpulist_count)) {
1155*4882a593Smuzhiyun 		for_each_cpu(local_cpu, local_mask)
1156*4882a593Smuzhiyun 			if (!test_and_set_bit(local_cpu, qib_cpulist)) {
1157*4882a593Smuzhiyun 				fd->rec_cpu_num = local_cpu;
1158*4882a593Smuzhiyun 				return;
1159*4882a593Smuzhiyun 			}
1160*4882a593Smuzhiyun 	}
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun 	/*
1163*4882a593Smuzhiyun 	 * If process has NOT already set it's affinity, select and
1164*4882a593Smuzhiyun 	 * reserve a processor for it, as a rendevous for all
1165*4882a593Smuzhiyun 	 * users of the driver.  If they don't actually later
1166*4882a593Smuzhiyun 	 * set affinity to this cpu, or set it to some other cpu,
1167*4882a593Smuzhiyun 	 * it just means that sooner or later we don't recommend
1168*4882a593Smuzhiyun 	 * a cpu, and let the scheduler do it's best.
1169*4882a593Smuzhiyun 	 */
1170*4882a593Smuzhiyun 	if (weight >= qib_cpulist_count) {
1171*4882a593Smuzhiyun 		int cpu;
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun 		cpu = find_first_zero_bit(qib_cpulist,
1174*4882a593Smuzhiyun 					  qib_cpulist_count);
1175*4882a593Smuzhiyun 		if (cpu == qib_cpulist_count)
1176*4882a593Smuzhiyun 			qib_dev_err(dd,
1177*4882a593Smuzhiyun 			"no cpus avail for affinity PID %u\n",
1178*4882a593Smuzhiyun 			current->pid);
1179*4882a593Smuzhiyun 		else {
1180*4882a593Smuzhiyun 			__set_bit(cpu, qib_cpulist);
1181*4882a593Smuzhiyun 			fd->rec_cpu_num = cpu;
1182*4882a593Smuzhiyun 		}
1183*4882a593Smuzhiyun 	}
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun /*
1187*4882a593Smuzhiyun  * Check that userland and driver are compatible for subcontexts.
1188*4882a593Smuzhiyun  */
qib_compatible_subctxts(int user_swmajor,int user_swminor)1189*4882a593Smuzhiyun static int qib_compatible_subctxts(int user_swmajor, int user_swminor)
1190*4882a593Smuzhiyun {
1191*4882a593Smuzhiyun 	/* this code is written long-hand for clarity */
1192*4882a593Smuzhiyun 	if (QIB_USER_SWMAJOR != user_swmajor) {
1193*4882a593Smuzhiyun 		/* no promise of compatibility if major mismatch */
1194*4882a593Smuzhiyun 		return 0;
1195*4882a593Smuzhiyun 	}
1196*4882a593Smuzhiyun 	if (QIB_USER_SWMAJOR == 1) {
1197*4882a593Smuzhiyun 		switch (QIB_USER_SWMINOR) {
1198*4882a593Smuzhiyun 		case 0:
1199*4882a593Smuzhiyun 		case 1:
1200*4882a593Smuzhiyun 		case 2:
1201*4882a593Smuzhiyun 			/* no subctxt implementation so cannot be compatible */
1202*4882a593Smuzhiyun 			return 0;
1203*4882a593Smuzhiyun 		case 3:
1204*4882a593Smuzhiyun 			/* 3 is only compatible with itself */
1205*4882a593Smuzhiyun 			return user_swminor == 3;
1206*4882a593Smuzhiyun 		default:
1207*4882a593Smuzhiyun 			/* >= 4 are compatible (or are expected to be) */
1208*4882a593Smuzhiyun 			return user_swminor <= QIB_USER_SWMINOR;
1209*4882a593Smuzhiyun 		}
1210*4882a593Smuzhiyun 	}
1211*4882a593Smuzhiyun 	/* make no promises yet for future major versions */
1212*4882a593Smuzhiyun 	return 0;
1213*4882a593Smuzhiyun }
1214*4882a593Smuzhiyun 
init_subctxts(struct qib_devdata * dd,struct qib_ctxtdata * rcd,const struct qib_user_info * uinfo)1215*4882a593Smuzhiyun static int init_subctxts(struct qib_devdata *dd,
1216*4882a593Smuzhiyun 			 struct qib_ctxtdata *rcd,
1217*4882a593Smuzhiyun 			 const struct qib_user_info *uinfo)
1218*4882a593Smuzhiyun {
1219*4882a593Smuzhiyun 	int ret = 0;
1220*4882a593Smuzhiyun 	unsigned num_subctxts;
1221*4882a593Smuzhiyun 	size_t size;
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 	/*
1224*4882a593Smuzhiyun 	 * If the user is requesting zero subctxts,
1225*4882a593Smuzhiyun 	 * skip the subctxt allocation.
1226*4882a593Smuzhiyun 	 */
1227*4882a593Smuzhiyun 	if (uinfo->spu_subctxt_cnt <= 0)
1228*4882a593Smuzhiyun 		goto bail;
1229*4882a593Smuzhiyun 	num_subctxts = uinfo->spu_subctxt_cnt;
1230*4882a593Smuzhiyun 
1231*4882a593Smuzhiyun 	/* Check for subctxt compatibility */
1232*4882a593Smuzhiyun 	if (!qib_compatible_subctxts(uinfo->spu_userversion >> 16,
1233*4882a593Smuzhiyun 		uinfo->spu_userversion & 0xffff)) {
1234*4882a593Smuzhiyun 		qib_devinfo(dd->pcidev,
1235*4882a593Smuzhiyun 			 "Mismatched user version (%d.%d) and driver version (%d.%d) while context sharing. Ensure that driver and library are from the same release.\n",
1236*4882a593Smuzhiyun 			 (int) (uinfo->spu_userversion >> 16),
1237*4882a593Smuzhiyun 			 (int) (uinfo->spu_userversion & 0xffff),
1238*4882a593Smuzhiyun 			 QIB_USER_SWMAJOR, QIB_USER_SWMINOR);
1239*4882a593Smuzhiyun 		goto bail;
1240*4882a593Smuzhiyun 	}
1241*4882a593Smuzhiyun 	if (num_subctxts > QLOGIC_IB_MAX_SUBCTXT) {
1242*4882a593Smuzhiyun 		ret = -EINVAL;
1243*4882a593Smuzhiyun 		goto bail;
1244*4882a593Smuzhiyun 	}
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 	rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts);
1247*4882a593Smuzhiyun 	if (!rcd->subctxt_uregbase) {
1248*4882a593Smuzhiyun 		ret = -ENOMEM;
1249*4882a593Smuzhiyun 		goto bail;
1250*4882a593Smuzhiyun 	}
1251*4882a593Smuzhiyun 	/* Note: rcd->rcvhdrq_size isn't initialized yet. */
1252*4882a593Smuzhiyun 	size = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
1253*4882a593Smuzhiyun 		     sizeof(u32), PAGE_SIZE) * num_subctxts;
1254*4882a593Smuzhiyun 	rcd->subctxt_rcvhdr_base = vmalloc_user(size);
1255*4882a593Smuzhiyun 	if (!rcd->subctxt_rcvhdr_base) {
1256*4882a593Smuzhiyun 		ret = -ENOMEM;
1257*4882a593Smuzhiyun 		goto bail_ureg;
1258*4882a593Smuzhiyun 	}
1259*4882a593Smuzhiyun 
1260*4882a593Smuzhiyun 	rcd->subctxt_rcvegrbuf = vmalloc_user(rcd->rcvegrbuf_chunks *
1261*4882a593Smuzhiyun 					      rcd->rcvegrbuf_size *
1262*4882a593Smuzhiyun 					      num_subctxts);
1263*4882a593Smuzhiyun 	if (!rcd->subctxt_rcvegrbuf) {
1264*4882a593Smuzhiyun 		ret = -ENOMEM;
1265*4882a593Smuzhiyun 		goto bail_rhdr;
1266*4882a593Smuzhiyun 	}
1267*4882a593Smuzhiyun 
1268*4882a593Smuzhiyun 	rcd->subctxt_cnt = uinfo->spu_subctxt_cnt;
1269*4882a593Smuzhiyun 	rcd->subctxt_id = uinfo->spu_subctxt_id;
1270*4882a593Smuzhiyun 	rcd->active_slaves = 1;
1271*4882a593Smuzhiyun 	rcd->redirect_seq_cnt = 1;
1272*4882a593Smuzhiyun 	set_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag);
1273*4882a593Smuzhiyun 	goto bail;
1274*4882a593Smuzhiyun 
1275*4882a593Smuzhiyun bail_rhdr:
1276*4882a593Smuzhiyun 	vfree(rcd->subctxt_rcvhdr_base);
1277*4882a593Smuzhiyun bail_ureg:
1278*4882a593Smuzhiyun 	vfree(rcd->subctxt_uregbase);
1279*4882a593Smuzhiyun 	rcd->subctxt_uregbase = NULL;
1280*4882a593Smuzhiyun bail:
1281*4882a593Smuzhiyun 	return ret;
1282*4882a593Smuzhiyun }
1283*4882a593Smuzhiyun 
setup_ctxt(struct qib_pportdata * ppd,int ctxt,struct file * fp,const struct qib_user_info * uinfo)1284*4882a593Smuzhiyun static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1285*4882a593Smuzhiyun 		      struct file *fp, const struct qib_user_info *uinfo)
1286*4882a593Smuzhiyun {
1287*4882a593Smuzhiyun 	struct qib_filedata *fd = fp->private_data;
1288*4882a593Smuzhiyun 	struct qib_devdata *dd = ppd->dd;
1289*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
1290*4882a593Smuzhiyun 	void *ptmp = NULL;
1291*4882a593Smuzhiyun 	int ret;
1292*4882a593Smuzhiyun 	int numa_id;
1293*4882a593Smuzhiyun 
1294*4882a593Smuzhiyun 	assign_ctxt_affinity(fp, dd);
1295*4882a593Smuzhiyun 
1296*4882a593Smuzhiyun 	numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
1297*4882a593Smuzhiyun 		cpu_to_node(fd->rec_cpu_num) :
1298*4882a593Smuzhiyun 		numa_node_id()) : dd->assigned_node_id;
1299*4882a593Smuzhiyun 
1300*4882a593Smuzhiyun 	rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 	/*
1303*4882a593Smuzhiyun 	 * Allocate memory for use in qib_tid_update() at open to
1304*4882a593Smuzhiyun 	 * reduce cost of expected send setup per message segment
1305*4882a593Smuzhiyun 	 */
1306*4882a593Smuzhiyun 	if (rcd)
1307*4882a593Smuzhiyun 		ptmp = kmalloc(dd->rcvtidcnt * sizeof(u16) +
1308*4882a593Smuzhiyun 			       dd->rcvtidcnt * sizeof(struct page **),
1309*4882a593Smuzhiyun 			       GFP_KERNEL);
1310*4882a593Smuzhiyun 
1311*4882a593Smuzhiyun 	if (!rcd || !ptmp) {
1312*4882a593Smuzhiyun 		qib_dev_err(dd,
1313*4882a593Smuzhiyun 			"Unable to allocate ctxtdata memory, failing open\n");
1314*4882a593Smuzhiyun 		ret = -ENOMEM;
1315*4882a593Smuzhiyun 		goto bailerr;
1316*4882a593Smuzhiyun 	}
1317*4882a593Smuzhiyun 	rcd->userversion = uinfo->spu_userversion;
1318*4882a593Smuzhiyun 	ret = init_subctxts(dd, rcd, uinfo);
1319*4882a593Smuzhiyun 	if (ret)
1320*4882a593Smuzhiyun 		goto bailerr;
1321*4882a593Smuzhiyun 	rcd->tid_pg_list = ptmp;
1322*4882a593Smuzhiyun 	rcd->pid = current->pid;
1323*4882a593Smuzhiyun 	init_waitqueue_head(&dd->rcd[ctxt]->wait);
1324*4882a593Smuzhiyun 	strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
1325*4882a593Smuzhiyun 	ctxt_fp(fp) = rcd;
1326*4882a593Smuzhiyun 	qib_stats.sps_ctxts++;
1327*4882a593Smuzhiyun 	dd->freectxts--;
1328*4882a593Smuzhiyun 	ret = 0;
1329*4882a593Smuzhiyun 	goto bail;
1330*4882a593Smuzhiyun 
1331*4882a593Smuzhiyun bailerr:
1332*4882a593Smuzhiyun 	if (fd->rec_cpu_num != -1)
1333*4882a593Smuzhiyun 		__clear_bit(fd->rec_cpu_num, qib_cpulist);
1334*4882a593Smuzhiyun 
1335*4882a593Smuzhiyun 	dd->rcd[ctxt] = NULL;
1336*4882a593Smuzhiyun 	kfree(rcd);
1337*4882a593Smuzhiyun 	kfree(ptmp);
1338*4882a593Smuzhiyun bail:
1339*4882a593Smuzhiyun 	return ret;
1340*4882a593Smuzhiyun }
1341*4882a593Smuzhiyun 
usable(struct qib_pportdata * ppd)1342*4882a593Smuzhiyun static inline int usable(struct qib_pportdata *ppd)
1343*4882a593Smuzhiyun {
1344*4882a593Smuzhiyun 	struct qib_devdata *dd = ppd->dd;
1345*4882a593Smuzhiyun 
1346*4882a593Smuzhiyun 	return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid &&
1347*4882a593Smuzhiyun 		(ppd->lflags & QIBL_LINKACTIVE);
1348*4882a593Smuzhiyun }
1349*4882a593Smuzhiyun 
1350*4882a593Smuzhiyun /*
1351*4882a593Smuzhiyun  * Select a context on the given device, either using a requested port
1352*4882a593Smuzhiyun  * or the port based on the context number.
1353*4882a593Smuzhiyun  */
choose_port_ctxt(struct file * fp,struct qib_devdata * dd,u32 port,const struct qib_user_info * uinfo)1354*4882a593Smuzhiyun static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port,
1355*4882a593Smuzhiyun 			    const struct qib_user_info *uinfo)
1356*4882a593Smuzhiyun {
1357*4882a593Smuzhiyun 	struct qib_pportdata *ppd = NULL;
1358*4882a593Smuzhiyun 	int ret, ctxt;
1359*4882a593Smuzhiyun 
1360*4882a593Smuzhiyun 	if (port) {
1361*4882a593Smuzhiyun 		if (!usable(dd->pport + port - 1)) {
1362*4882a593Smuzhiyun 			ret = -ENETDOWN;
1363*4882a593Smuzhiyun 			goto done;
1364*4882a593Smuzhiyun 		} else
1365*4882a593Smuzhiyun 			ppd = dd->pport + port - 1;
1366*4882a593Smuzhiyun 	}
1367*4882a593Smuzhiyun 	for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt];
1368*4882a593Smuzhiyun 	     ctxt++)
1369*4882a593Smuzhiyun 		;
1370*4882a593Smuzhiyun 	if (ctxt == dd->cfgctxts) {
1371*4882a593Smuzhiyun 		ret = -EBUSY;
1372*4882a593Smuzhiyun 		goto done;
1373*4882a593Smuzhiyun 	}
1374*4882a593Smuzhiyun 	if (!ppd) {
1375*4882a593Smuzhiyun 		u32 pidx = ctxt % dd->num_pports;
1376*4882a593Smuzhiyun 
1377*4882a593Smuzhiyun 		if (usable(dd->pport + pidx))
1378*4882a593Smuzhiyun 			ppd = dd->pport + pidx;
1379*4882a593Smuzhiyun 		else {
1380*4882a593Smuzhiyun 			for (pidx = 0; pidx < dd->num_pports && !ppd;
1381*4882a593Smuzhiyun 			     pidx++)
1382*4882a593Smuzhiyun 				if (usable(dd->pport + pidx))
1383*4882a593Smuzhiyun 					ppd = dd->pport + pidx;
1384*4882a593Smuzhiyun 		}
1385*4882a593Smuzhiyun 	}
1386*4882a593Smuzhiyun 	ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN;
1387*4882a593Smuzhiyun done:
1388*4882a593Smuzhiyun 	return ret;
1389*4882a593Smuzhiyun }
1390*4882a593Smuzhiyun 
find_free_ctxt(int unit,struct file * fp,const struct qib_user_info * uinfo)1391*4882a593Smuzhiyun static int find_free_ctxt(int unit, struct file *fp,
1392*4882a593Smuzhiyun 			  const struct qib_user_info *uinfo)
1393*4882a593Smuzhiyun {
1394*4882a593Smuzhiyun 	struct qib_devdata *dd = qib_lookup(unit);
1395*4882a593Smuzhiyun 	int ret;
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports))
1398*4882a593Smuzhiyun 		ret = -ENODEV;
1399*4882a593Smuzhiyun 	else
1400*4882a593Smuzhiyun 		ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo);
1401*4882a593Smuzhiyun 
1402*4882a593Smuzhiyun 	return ret;
1403*4882a593Smuzhiyun }
1404*4882a593Smuzhiyun 
get_a_ctxt(struct file * fp,const struct qib_user_info * uinfo,unsigned alg)1405*4882a593Smuzhiyun static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
1406*4882a593Smuzhiyun 		      unsigned alg)
1407*4882a593Smuzhiyun {
1408*4882a593Smuzhiyun 	struct qib_devdata *udd = NULL;
1409*4882a593Smuzhiyun 	int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i;
1410*4882a593Smuzhiyun 	u32 port = uinfo->spu_port, ctxt;
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 	devmax = qib_count_units(&npresent, &nup);
1413*4882a593Smuzhiyun 	if (!npresent) {
1414*4882a593Smuzhiyun 		ret = -ENXIO;
1415*4882a593Smuzhiyun 		goto done;
1416*4882a593Smuzhiyun 	}
1417*4882a593Smuzhiyun 	if (nup == 0) {
1418*4882a593Smuzhiyun 		ret = -ENETDOWN;
1419*4882a593Smuzhiyun 		goto done;
1420*4882a593Smuzhiyun 	}
1421*4882a593Smuzhiyun 
1422*4882a593Smuzhiyun 	if (alg == QIB_PORT_ALG_ACROSS) {
1423*4882a593Smuzhiyun 		unsigned inuse = ~0U;
1424*4882a593Smuzhiyun 
1425*4882a593Smuzhiyun 		/* find device (with ACTIVE ports) with fewest ctxts in use */
1426*4882a593Smuzhiyun 		for (ndev = 0; ndev < devmax; ndev++) {
1427*4882a593Smuzhiyun 			struct qib_devdata *dd = qib_lookup(ndev);
1428*4882a593Smuzhiyun 			unsigned cused = 0, cfree = 0, pusable = 0;
1429*4882a593Smuzhiyun 
1430*4882a593Smuzhiyun 			if (!dd)
1431*4882a593Smuzhiyun 				continue;
1432*4882a593Smuzhiyun 			if (port && port <= dd->num_pports &&
1433*4882a593Smuzhiyun 			    usable(dd->pport + port - 1))
1434*4882a593Smuzhiyun 				pusable = 1;
1435*4882a593Smuzhiyun 			else
1436*4882a593Smuzhiyun 				for (i = 0; i < dd->num_pports; i++)
1437*4882a593Smuzhiyun 					if (usable(dd->pport + i))
1438*4882a593Smuzhiyun 						pusable++;
1439*4882a593Smuzhiyun 			if (!pusable)
1440*4882a593Smuzhiyun 				continue;
1441*4882a593Smuzhiyun 			for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
1442*4882a593Smuzhiyun 			     ctxt++)
1443*4882a593Smuzhiyun 				if (dd->rcd[ctxt])
1444*4882a593Smuzhiyun 					cused++;
1445*4882a593Smuzhiyun 				else
1446*4882a593Smuzhiyun 					cfree++;
1447*4882a593Smuzhiyun 			if (cfree && cused < inuse) {
1448*4882a593Smuzhiyun 				udd = dd;
1449*4882a593Smuzhiyun 				inuse = cused;
1450*4882a593Smuzhiyun 			}
1451*4882a593Smuzhiyun 		}
1452*4882a593Smuzhiyun 		if (udd) {
1453*4882a593Smuzhiyun 			ret = choose_port_ctxt(fp, udd, port, uinfo);
1454*4882a593Smuzhiyun 			goto done;
1455*4882a593Smuzhiyun 		}
1456*4882a593Smuzhiyun 	} else {
1457*4882a593Smuzhiyun 		for (ndev = 0; ndev < devmax; ndev++) {
1458*4882a593Smuzhiyun 			struct qib_devdata *dd = qib_lookup(ndev);
1459*4882a593Smuzhiyun 
1460*4882a593Smuzhiyun 			if (dd) {
1461*4882a593Smuzhiyun 				ret = choose_port_ctxt(fp, dd, port, uinfo);
1462*4882a593Smuzhiyun 				if (!ret)
1463*4882a593Smuzhiyun 					goto done;
1464*4882a593Smuzhiyun 				if (ret == -EBUSY)
1465*4882a593Smuzhiyun 					dusable++;
1466*4882a593Smuzhiyun 			}
1467*4882a593Smuzhiyun 		}
1468*4882a593Smuzhiyun 	}
1469*4882a593Smuzhiyun 	ret = dusable ? -EBUSY : -ENETDOWN;
1470*4882a593Smuzhiyun 
1471*4882a593Smuzhiyun done:
1472*4882a593Smuzhiyun 	return ret;
1473*4882a593Smuzhiyun }
1474*4882a593Smuzhiyun 
find_shared_ctxt(struct file * fp,const struct qib_user_info * uinfo)1475*4882a593Smuzhiyun static int find_shared_ctxt(struct file *fp,
1476*4882a593Smuzhiyun 			    const struct qib_user_info *uinfo)
1477*4882a593Smuzhiyun {
1478*4882a593Smuzhiyun 	int devmax, ndev, i;
1479*4882a593Smuzhiyun 	int ret = 0;
1480*4882a593Smuzhiyun 
1481*4882a593Smuzhiyun 	devmax = qib_count_units(NULL, NULL);
1482*4882a593Smuzhiyun 
1483*4882a593Smuzhiyun 	for (ndev = 0; ndev < devmax; ndev++) {
1484*4882a593Smuzhiyun 		struct qib_devdata *dd = qib_lookup(ndev);
1485*4882a593Smuzhiyun 
1486*4882a593Smuzhiyun 		/* device portion of usable() */
1487*4882a593Smuzhiyun 		if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
1488*4882a593Smuzhiyun 			continue;
1489*4882a593Smuzhiyun 		for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
1490*4882a593Smuzhiyun 			struct qib_ctxtdata *rcd = dd->rcd[i];
1491*4882a593Smuzhiyun 
1492*4882a593Smuzhiyun 			/* Skip ctxts which are not yet open */
1493*4882a593Smuzhiyun 			if (!rcd || !rcd->cnt)
1494*4882a593Smuzhiyun 				continue;
1495*4882a593Smuzhiyun 			/* Skip ctxt if it doesn't match the requested one */
1496*4882a593Smuzhiyun 			if (rcd->subctxt_id != uinfo->spu_subctxt_id)
1497*4882a593Smuzhiyun 				continue;
1498*4882a593Smuzhiyun 			/* Verify the sharing process matches the master */
1499*4882a593Smuzhiyun 			if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
1500*4882a593Smuzhiyun 			    rcd->userversion != uinfo->spu_userversion ||
1501*4882a593Smuzhiyun 			    rcd->cnt >= rcd->subctxt_cnt) {
1502*4882a593Smuzhiyun 				ret = -EINVAL;
1503*4882a593Smuzhiyun 				goto done;
1504*4882a593Smuzhiyun 			}
1505*4882a593Smuzhiyun 			ctxt_fp(fp) = rcd;
1506*4882a593Smuzhiyun 			subctxt_fp(fp) = rcd->cnt++;
1507*4882a593Smuzhiyun 			rcd->subpid[subctxt_fp(fp)] = current->pid;
1508*4882a593Smuzhiyun 			tidcursor_fp(fp) = 0;
1509*4882a593Smuzhiyun 			rcd->active_slaves |= 1 << subctxt_fp(fp);
1510*4882a593Smuzhiyun 			ret = 1;
1511*4882a593Smuzhiyun 			goto done;
1512*4882a593Smuzhiyun 		}
1513*4882a593Smuzhiyun 	}
1514*4882a593Smuzhiyun 
1515*4882a593Smuzhiyun done:
1516*4882a593Smuzhiyun 	return ret;
1517*4882a593Smuzhiyun }
1518*4882a593Smuzhiyun 
qib_open(struct inode * in,struct file * fp)1519*4882a593Smuzhiyun static int qib_open(struct inode *in, struct file *fp)
1520*4882a593Smuzhiyun {
1521*4882a593Smuzhiyun 	/* The real work is performed later in qib_assign_ctxt() */
1522*4882a593Smuzhiyun 	fp->private_data = kzalloc(sizeof(struct qib_filedata), GFP_KERNEL);
1523*4882a593Smuzhiyun 	if (fp->private_data) /* no cpu affinity by default */
1524*4882a593Smuzhiyun 		((struct qib_filedata *)fp->private_data)->rec_cpu_num = -1;
1525*4882a593Smuzhiyun 	return fp->private_data ? 0 : -ENOMEM;
1526*4882a593Smuzhiyun }
1527*4882a593Smuzhiyun 
find_hca(unsigned int cpu,int * unit)1528*4882a593Smuzhiyun static int find_hca(unsigned int cpu, int *unit)
1529*4882a593Smuzhiyun {
1530*4882a593Smuzhiyun 	int ret = 0, devmax, npresent, nup, ndev;
1531*4882a593Smuzhiyun 
1532*4882a593Smuzhiyun 	*unit = -1;
1533*4882a593Smuzhiyun 
1534*4882a593Smuzhiyun 	devmax = qib_count_units(&npresent, &nup);
1535*4882a593Smuzhiyun 	if (!npresent) {
1536*4882a593Smuzhiyun 		ret = -ENXIO;
1537*4882a593Smuzhiyun 		goto done;
1538*4882a593Smuzhiyun 	}
1539*4882a593Smuzhiyun 	if (!nup) {
1540*4882a593Smuzhiyun 		ret = -ENETDOWN;
1541*4882a593Smuzhiyun 		goto done;
1542*4882a593Smuzhiyun 	}
1543*4882a593Smuzhiyun 	for (ndev = 0; ndev < devmax; ndev++) {
1544*4882a593Smuzhiyun 		struct qib_devdata *dd = qib_lookup(ndev);
1545*4882a593Smuzhiyun 
1546*4882a593Smuzhiyun 		if (dd) {
1547*4882a593Smuzhiyun 			if (pcibus_to_node(dd->pcidev->bus) < 0) {
1548*4882a593Smuzhiyun 				ret = -EINVAL;
1549*4882a593Smuzhiyun 				goto done;
1550*4882a593Smuzhiyun 			}
1551*4882a593Smuzhiyun 			if (cpu_to_node(cpu) ==
1552*4882a593Smuzhiyun 				pcibus_to_node(dd->pcidev->bus)) {
1553*4882a593Smuzhiyun 				*unit = ndev;
1554*4882a593Smuzhiyun 				goto done;
1555*4882a593Smuzhiyun 			}
1556*4882a593Smuzhiyun 		}
1557*4882a593Smuzhiyun 	}
1558*4882a593Smuzhiyun done:
1559*4882a593Smuzhiyun 	return ret;
1560*4882a593Smuzhiyun }
1561*4882a593Smuzhiyun 
do_qib_user_sdma_queue_create(struct file * fp)1562*4882a593Smuzhiyun static int do_qib_user_sdma_queue_create(struct file *fp)
1563*4882a593Smuzhiyun {
1564*4882a593Smuzhiyun 	struct qib_filedata *fd = fp->private_data;
1565*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd = fd->rcd;
1566*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
1567*4882a593Smuzhiyun 
1568*4882a593Smuzhiyun 	if (dd->flags & QIB_HAS_SEND_DMA) {
1569*4882a593Smuzhiyun 
1570*4882a593Smuzhiyun 		fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
1571*4882a593Smuzhiyun 						    dd->unit,
1572*4882a593Smuzhiyun 						    rcd->ctxt,
1573*4882a593Smuzhiyun 						    fd->subctxt);
1574*4882a593Smuzhiyun 		if (!fd->pq)
1575*4882a593Smuzhiyun 			return -ENOMEM;
1576*4882a593Smuzhiyun 	}
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 	return 0;
1579*4882a593Smuzhiyun }
1580*4882a593Smuzhiyun 
1581*4882a593Smuzhiyun /*
1582*4882a593Smuzhiyun  * Get ctxt early, so can set affinity prior to memory allocation.
1583*4882a593Smuzhiyun  */
qib_assign_ctxt(struct file * fp,const struct qib_user_info * uinfo)1584*4882a593Smuzhiyun static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1585*4882a593Smuzhiyun {
1586*4882a593Smuzhiyun 	int ret;
1587*4882a593Smuzhiyun 	int i_minor;
1588*4882a593Smuzhiyun 	unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS;
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	/* Check to be sure we haven't already initialized this file */
1591*4882a593Smuzhiyun 	if (ctxt_fp(fp)) {
1592*4882a593Smuzhiyun 		ret = -EINVAL;
1593*4882a593Smuzhiyun 		goto done;
1594*4882a593Smuzhiyun 	}
1595*4882a593Smuzhiyun 
1596*4882a593Smuzhiyun 	/* for now, if major version is different, bail */
1597*4882a593Smuzhiyun 	swmajor = uinfo->spu_userversion >> 16;
1598*4882a593Smuzhiyun 	if (swmajor != QIB_USER_SWMAJOR) {
1599*4882a593Smuzhiyun 		ret = -ENODEV;
1600*4882a593Smuzhiyun 		goto done;
1601*4882a593Smuzhiyun 	}
1602*4882a593Smuzhiyun 
1603*4882a593Smuzhiyun 	swminor = uinfo->spu_userversion & 0xffff;
1604*4882a593Smuzhiyun 
1605*4882a593Smuzhiyun 	if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
1606*4882a593Smuzhiyun 		alg = uinfo->spu_port_alg;
1607*4882a593Smuzhiyun 
1608*4882a593Smuzhiyun 	mutex_lock(&qib_mutex);
1609*4882a593Smuzhiyun 
1610*4882a593Smuzhiyun 	if (qib_compatible_subctxts(swmajor, swminor) &&
1611*4882a593Smuzhiyun 	    uinfo->spu_subctxt_cnt) {
1612*4882a593Smuzhiyun 		ret = find_shared_ctxt(fp, uinfo);
1613*4882a593Smuzhiyun 		if (ret > 0) {
1614*4882a593Smuzhiyun 			ret = do_qib_user_sdma_queue_create(fp);
1615*4882a593Smuzhiyun 			if (!ret)
1616*4882a593Smuzhiyun 				assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
1617*4882a593Smuzhiyun 			goto done_ok;
1618*4882a593Smuzhiyun 		}
1619*4882a593Smuzhiyun 	}
1620*4882a593Smuzhiyun 
1621*4882a593Smuzhiyun 	i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
1622*4882a593Smuzhiyun 	if (i_minor)
1623*4882a593Smuzhiyun 		ret = find_free_ctxt(i_minor - 1, fp, uinfo);
1624*4882a593Smuzhiyun 	else {
1625*4882a593Smuzhiyun 		int unit;
1626*4882a593Smuzhiyun 		const unsigned int cpu = cpumask_first(current->cpus_ptr);
1627*4882a593Smuzhiyun 		const unsigned int weight = current->nr_cpus_allowed;
1628*4882a593Smuzhiyun 
1629*4882a593Smuzhiyun 		if (weight == 1 && !test_bit(cpu, qib_cpulist))
1630*4882a593Smuzhiyun 			if (!find_hca(cpu, &unit) && unit >= 0)
1631*4882a593Smuzhiyun 				if (!find_free_ctxt(unit, fp, uinfo)) {
1632*4882a593Smuzhiyun 					ret = 0;
1633*4882a593Smuzhiyun 					goto done_chk_sdma;
1634*4882a593Smuzhiyun 				}
1635*4882a593Smuzhiyun 		ret = get_a_ctxt(fp, uinfo, alg);
1636*4882a593Smuzhiyun 	}
1637*4882a593Smuzhiyun 
1638*4882a593Smuzhiyun done_chk_sdma:
1639*4882a593Smuzhiyun 	if (!ret)
1640*4882a593Smuzhiyun 		ret = do_qib_user_sdma_queue_create(fp);
1641*4882a593Smuzhiyun done_ok:
1642*4882a593Smuzhiyun 	mutex_unlock(&qib_mutex);
1643*4882a593Smuzhiyun 
1644*4882a593Smuzhiyun done:
1645*4882a593Smuzhiyun 	return ret;
1646*4882a593Smuzhiyun }
1647*4882a593Smuzhiyun 
1648*4882a593Smuzhiyun 
qib_do_user_init(struct file * fp,const struct qib_user_info * uinfo)1649*4882a593Smuzhiyun static int qib_do_user_init(struct file *fp,
1650*4882a593Smuzhiyun 			    const struct qib_user_info *uinfo)
1651*4882a593Smuzhiyun {
1652*4882a593Smuzhiyun 	int ret;
1653*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd = ctxt_fp(fp);
1654*4882a593Smuzhiyun 	struct qib_devdata *dd;
1655*4882a593Smuzhiyun 	unsigned uctxt;
1656*4882a593Smuzhiyun 
1657*4882a593Smuzhiyun 	/* Subctxts don't need to initialize anything since master did it. */
1658*4882a593Smuzhiyun 	if (subctxt_fp(fp)) {
1659*4882a593Smuzhiyun 		ret = wait_event_interruptible(rcd->wait,
1660*4882a593Smuzhiyun 			!test_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag));
1661*4882a593Smuzhiyun 		goto bail;
1662*4882a593Smuzhiyun 	}
1663*4882a593Smuzhiyun 
1664*4882a593Smuzhiyun 	dd = rcd->dd;
1665*4882a593Smuzhiyun 
1666*4882a593Smuzhiyun 	/* some ctxts may get extra buffers, calculate that here */
1667*4882a593Smuzhiyun 	uctxt = rcd->ctxt - dd->first_user_ctxt;
1668*4882a593Smuzhiyun 	if (uctxt < dd->ctxts_extrabuf) {
1669*4882a593Smuzhiyun 		rcd->piocnt = dd->pbufsctxt + 1;
1670*4882a593Smuzhiyun 		rcd->pio_base = rcd->piocnt * uctxt;
1671*4882a593Smuzhiyun 	} else {
1672*4882a593Smuzhiyun 		rcd->piocnt = dd->pbufsctxt;
1673*4882a593Smuzhiyun 		rcd->pio_base = rcd->piocnt * uctxt +
1674*4882a593Smuzhiyun 			dd->ctxts_extrabuf;
1675*4882a593Smuzhiyun 	}
1676*4882a593Smuzhiyun 
1677*4882a593Smuzhiyun 	/*
1678*4882a593Smuzhiyun 	 * All user buffers are 2KB buffers.  If we ever support
1679*4882a593Smuzhiyun 	 * giving 4KB buffers to user processes, this will need some
1680*4882a593Smuzhiyun 	 * work.  Can't use piobufbase directly, because it has
1681*4882a593Smuzhiyun 	 * both 2K and 4K buffer base values.  So check and handle.
1682*4882a593Smuzhiyun 	 */
1683*4882a593Smuzhiyun 	if ((rcd->pio_base + rcd->piocnt) > dd->piobcnt2k) {
1684*4882a593Smuzhiyun 		if (rcd->pio_base >= dd->piobcnt2k) {
1685*4882a593Smuzhiyun 			qib_dev_err(dd,
1686*4882a593Smuzhiyun 				    "%u:ctxt%u: no 2KB buffers available\n",
1687*4882a593Smuzhiyun 				    dd->unit, rcd->ctxt);
1688*4882a593Smuzhiyun 			ret = -ENOBUFS;
1689*4882a593Smuzhiyun 			goto bail;
1690*4882a593Smuzhiyun 		}
1691*4882a593Smuzhiyun 		rcd->piocnt = dd->piobcnt2k - rcd->pio_base;
1692*4882a593Smuzhiyun 		qib_dev_err(dd, "Ctxt%u: would use 4KB bufs, using %u\n",
1693*4882a593Smuzhiyun 			    rcd->ctxt, rcd->piocnt);
1694*4882a593Smuzhiyun 	}
1695*4882a593Smuzhiyun 
1696*4882a593Smuzhiyun 	rcd->piobufs = dd->pio2k_bufbase + rcd->pio_base * dd->palign;
1697*4882a593Smuzhiyun 	qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt,
1698*4882a593Smuzhiyun 			       TXCHK_CHG_TYPE_USER, rcd);
1699*4882a593Smuzhiyun 	/*
1700*4882a593Smuzhiyun 	 * try to ensure that processes start up with consistent avail update
1701*4882a593Smuzhiyun 	 * for their own range, at least.   If system very quiet, it might
1702*4882a593Smuzhiyun 	 * have the in-memory copy out of date at startup for this range of
1703*4882a593Smuzhiyun 	 * buffers, when a context gets re-used.  Do after the chg_pioavail
1704*4882a593Smuzhiyun 	 * and before the rest of setup, so it's "almost certain" the dma
1705*4882a593Smuzhiyun 	 * will have occurred (can't 100% guarantee, but should be many
1706*4882a593Smuzhiyun 	 * decimals of 9s, with this ordering), given how much else happens
1707*4882a593Smuzhiyun 	 * after this.
1708*4882a593Smuzhiyun 	 */
1709*4882a593Smuzhiyun 	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
1710*4882a593Smuzhiyun 
1711*4882a593Smuzhiyun 	/*
1712*4882a593Smuzhiyun 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
1713*4882a593Smuzhiyun 	 * array for time being.  If rcd->ctxt > chip-supported,
1714*4882a593Smuzhiyun 	 * we need to do extra stuff here to handle by handling overflow
1715*4882a593Smuzhiyun 	 * through ctxt 0, someday
1716*4882a593Smuzhiyun 	 */
1717*4882a593Smuzhiyun 	ret = qib_create_rcvhdrq(dd, rcd);
1718*4882a593Smuzhiyun 	if (!ret)
1719*4882a593Smuzhiyun 		ret = qib_setup_eagerbufs(rcd);
1720*4882a593Smuzhiyun 	if (ret)
1721*4882a593Smuzhiyun 		goto bail_pio;
1722*4882a593Smuzhiyun 
1723*4882a593Smuzhiyun 	rcd->tidcursor = 0; /* start at beginning after open */
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun 	/* initialize poll variables... */
1726*4882a593Smuzhiyun 	rcd->urgent = 0;
1727*4882a593Smuzhiyun 	rcd->urgent_poll = 0;
1728*4882a593Smuzhiyun 
1729*4882a593Smuzhiyun 	/*
1730*4882a593Smuzhiyun 	 * Now enable the ctxt for receive.
1731*4882a593Smuzhiyun 	 * For chips that are set to DMA the tail register to memory
1732*4882a593Smuzhiyun 	 * when they change (and when the update bit transitions from
1733*4882a593Smuzhiyun 	 * 0 to 1.  So for those chips, we turn it off and then back on.
1734*4882a593Smuzhiyun 	 * This will (very briefly) affect any other open ctxts, but the
1735*4882a593Smuzhiyun 	 * duration is very short, and therefore isn't an issue.  We
1736*4882a593Smuzhiyun 	 * explicitly set the in-memory tail copy to 0 beforehand, so we
1737*4882a593Smuzhiyun 	 * don't have to wait to be sure the DMA update has happened
1738*4882a593Smuzhiyun 	 * (chip resets head/tail to 0 on transition to enable).
1739*4882a593Smuzhiyun 	 */
1740*4882a593Smuzhiyun 	if (rcd->rcvhdrtail_kvaddr)
1741*4882a593Smuzhiyun 		qib_clear_rcvhdrtail(rcd);
1742*4882a593Smuzhiyun 
1743*4882a593Smuzhiyun 	dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB,
1744*4882a593Smuzhiyun 		      rcd->ctxt);
1745*4882a593Smuzhiyun 
1746*4882a593Smuzhiyun 	/* Notify any waiting slaves */
1747*4882a593Smuzhiyun 	if (rcd->subctxt_cnt) {
1748*4882a593Smuzhiyun 		clear_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag);
1749*4882a593Smuzhiyun 		wake_up(&rcd->wait);
1750*4882a593Smuzhiyun 	}
1751*4882a593Smuzhiyun 	return 0;
1752*4882a593Smuzhiyun 
1753*4882a593Smuzhiyun bail_pio:
1754*4882a593Smuzhiyun 	qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt,
1755*4882a593Smuzhiyun 			       TXCHK_CHG_TYPE_KERN, rcd);
1756*4882a593Smuzhiyun bail:
1757*4882a593Smuzhiyun 	return ret;
1758*4882a593Smuzhiyun }
1759*4882a593Smuzhiyun 
1760*4882a593Smuzhiyun /**
1761*4882a593Smuzhiyun  * unlock_exptid - unlock any expected TID entries context still had in use
1762*4882a593Smuzhiyun  * @rcd: ctxt
1763*4882a593Smuzhiyun  *
1764*4882a593Smuzhiyun  * We don't actually update the chip here, because we do a bulk update
1765*4882a593Smuzhiyun  * below, using f_clear_tids.
1766*4882a593Smuzhiyun  */
unlock_expected_tids(struct qib_ctxtdata * rcd)1767*4882a593Smuzhiyun static void unlock_expected_tids(struct qib_ctxtdata *rcd)
1768*4882a593Smuzhiyun {
1769*4882a593Smuzhiyun 	struct qib_devdata *dd = rcd->dd;
1770*4882a593Smuzhiyun 	int ctxt_tidbase = rcd->ctxt * dd->rcvtidcnt;
1771*4882a593Smuzhiyun 	int i, cnt = 0, maxtid = ctxt_tidbase + dd->rcvtidcnt;
1772*4882a593Smuzhiyun 
1773*4882a593Smuzhiyun 	for (i = ctxt_tidbase; i < maxtid; i++) {
1774*4882a593Smuzhiyun 		struct page *p = dd->pageshadow[i];
1775*4882a593Smuzhiyun 		dma_addr_t phys;
1776*4882a593Smuzhiyun 
1777*4882a593Smuzhiyun 		if (!p)
1778*4882a593Smuzhiyun 			continue;
1779*4882a593Smuzhiyun 
1780*4882a593Smuzhiyun 		phys = dd->physshadow[i];
1781*4882a593Smuzhiyun 		dd->physshadow[i] = dd->tidinvalid;
1782*4882a593Smuzhiyun 		dd->pageshadow[i] = NULL;
1783*4882a593Smuzhiyun 		pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
1784*4882a593Smuzhiyun 			       PCI_DMA_FROMDEVICE);
1785*4882a593Smuzhiyun 		qib_release_user_pages(&p, 1);
1786*4882a593Smuzhiyun 		cnt++;
1787*4882a593Smuzhiyun 	}
1788*4882a593Smuzhiyun }
1789*4882a593Smuzhiyun 
qib_close(struct inode * in,struct file * fp)1790*4882a593Smuzhiyun static int qib_close(struct inode *in, struct file *fp)
1791*4882a593Smuzhiyun {
1792*4882a593Smuzhiyun 	struct qib_filedata *fd;
1793*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
1794*4882a593Smuzhiyun 	struct qib_devdata *dd;
1795*4882a593Smuzhiyun 	unsigned long flags;
1796*4882a593Smuzhiyun 	unsigned ctxt;
1797*4882a593Smuzhiyun 
1798*4882a593Smuzhiyun 	mutex_lock(&qib_mutex);
1799*4882a593Smuzhiyun 
1800*4882a593Smuzhiyun 	fd = fp->private_data;
1801*4882a593Smuzhiyun 	fp->private_data = NULL;
1802*4882a593Smuzhiyun 	rcd = fd->rcd;
1803*4882a593Smuzhiyun 	if (!rcd) {
1804*4882a593Smuzhiyun 		mutex_unlock(&qib_mutex);
1805*4882a593Smuzhiyun 		goto bail;
1806*4882a593Smuzhiyun 	}
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 	dd = rcd->dd;
1809*4882a593Smuzhiyun 
1810*4882a593Smuzhiyun 	/* ensure all pio buffer writes in progress are flushed */
1811*4882a593Smuzhiyun 	qib_flush_wc();
1812*4882a593Smuzhiyun 
1813*4882a593Smuzhiyun 	/* drain user sdma queue */
1814*4882a593Smuzhiyun 	if (fd->pq) {
1815*4882a593Smuzhiyun 		qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
1816*4882a593Smuzhiyun 		qib_user_sdma_queue_destroy(fd->pq);
1817*4882a593Smuzhiyun 	}
1818*4882a593Smuzhiyun 
1819*4882a593Smuzhiyun 	if (fd->rec_cpu_num != -1)
1820*4882a593Smuzhiyun 		__clear_bit(fd->rec_cpu_num, qib_cpulist);
1821*4882a593Smuzhiyun 
1822*4882a593Smuzhiyun 	if (--rcd->cnt) {
1823*4882a593Smuzhiyun 		/*
1824*4882a593Smuzhiyun 		 * XXX If the master closes the context before the slave(s),
1825*4882a593Smuzhiyun 		 * revoke the mmap for the eager receive queue so
1826*4882a593Smuzhiyun 		 * the slave(s) don't wait for receive data forever.
1827*4882a593Smuzhiyun 		 */
1828*4882a593Smuzhiyun 		rcd->active_slaves &= ~(1 << fd->subctxt);
1829*4882a593Smuzhiyun 		rcd->subpid[fd->subctxt] = 0;
1830*4882a593Smuzhiyun 		mutex_unlock(&qib_mutex);
1831*4882a593Smuzhiyun 		goto bail;
1832*4882a593Smuzhiyun 	}
1833*4882a593Smuzhiyun 
1834*4882a593Smuzhiyun 	/* early; no interrupt users after this */
1835*4882a593Smuzhiyun 	spin_lock_irqsave(&dd->uctxt_lock, flags);
1836*4882a593Smuzhiyun 	ctxt = rcd->ctxt;
1837*4882a593Smuzhiyun 	dd->rcd[ctxt] = NULL;
1838*4882a593Smuzhiyun 	rcd->pid = 0;
1839*4882a593Smuzhiyun 	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1840*4882a593Smuzhiyun 
1841*4882a593Smuzhiyun 	if (rcd->rcvwait_to || rcd->piowait_to ||
1842*4882a593Smuzhiyun 	    rcd->rcvnowait || rcd->pionowait) {
1843*4882a593Smuzhiyun 		rcd->rcvwait_to = 0;
1844*4882a593Smuzhiyun 		rcd->piowait_to = 0;
1845*4882a593Smuzhiyun 		rcd->rcvnowait = 0;
1846*4882a593Smuzhiyun 		rcd->pionowait = 0;
1847*4882a593Smuzhiyun 	}
1848*4882a593Smuzhiyun 	if (rcd->flag)
1849*4882a593Smuzhiyun 		rcd->flag = 0;
1850*4882a593Smuzhiyun 
1851*4882a593Smuzhiyun 	if (dd->kregbase) {
1852*4882a593Smuzhiyun 		/* atomically clear receive enable ctxt and intr avail. */
1853*4882a593Smuzhiyun 		dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_DIS |
1854*4882a593Smuzhiyun 				  QIB_RCVCTRL_INTRAVAIL_DIS, ctxt);
1855*4882a593Smuzhiyun 
1856*4882a593Smuzhiyun 		/* clean up the pkeys for this ctxt user */
1857*4882a593Smuzhiyun 		qib_clean_part_key(rcd, dd);
1858*4882a593Smuzhiyun 		qib_disarm_piobufs(dd, rcd->pio_base, rcd->piocnt);
1859*4882a593Smuzhiyun 		qib_chg_pioavailkernel(dd, rcd->pio_base,
1860*4882a593Smuzhiyun 				       rcd->piocnt, TXCHK_CHG_TYPE_KERN, NULL);
1861*4882a593Smuzhiyun 
1862*4882a593Smuzhiyun 		dd->f_clear_tids(dd, rcd);
1863*4882a593Smuzhiyun 
1864*4882a593Smuzhiyun 		if (dd->pageshadow)
1865*4882a593Smuzhiyun 			unlock_expected_tids(rcd);
1866*4882a593Smuzhiyun 		qib_stats.sps_ctxts--;
1867*4882a593Smuzhiyun 		dd->freectxts++;
1868*4882a593Smuzhiyun 	}
1869*4882a593Smuzhiyun 
1870*4882a593Smuzhiyun 	mutex_unlock(&qib_mutex);
1871*4882a593Smuzhiyun 	qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */
1872*4882a593Smuzhiyun 
1873*4882a593Smuzhiyun bail:
1874*4882a593Smuzhiyun 	kfree(fd);
1875*4882a593Smuzhiyun 	return 0;
1876*4882a593Smuzhiyun }
1877*4882a593Smuzhiyun 
qib_ctxt_info(struct file * fp,struct qib_ctxt_info __user * uinfo)1878*4882a593Smuzhiyun static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo)
1879*4882a593Smuzhiyun {
1880*4882a593Smuzhiyun 	struct qib_ctxt_info info;
1881*4882a593Smuzhiyun 	int ret;
1882*4882a593Smuzhiyun 	size_t sz;
1883*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd = ctxt_fp(fp);
1884*4882a593Smuzhiyun 	struct qib_filedata *fd;
1885*4882a593Smuzhiyun 
1886*4882a593Smuzhiyun 	fd = fp->private_data;
1887*4882a593Smuzhiyun 
1888*4882a593Smuzhiyun 	info.num_active = qib_count_active_units();
1889*4882a593Smuzhiyun 	info.unit = rcd->dd->unit;
1890*4882a593Smuzhiyun 	info.port = rcd->ppd->port;
1891*4882a593Smuzhiyun 	info.ctxt = rcd->ctxt;
1892*4882a593Smuzhiyun 	info.subctxt =  subctxt_fp(fp);
1893*4882a593Smuzhiyun 	/* Number of user ctxts available for this device. */
1894*4882a593Smuzhiyun 	info.num_ctxts = rcd->dd->cfgctxts - rcd->dd->first_user_ctxt;
1895*4882a593Smuzhiyun 	info.num_subctxts = rcd->subctxt_cnt;
1896*4882a593Smuzhiyun 	info.rec_cpu = fd->rec_cpu_num;
1897*4882a593Smuzhiyun 	sz = sizeof(info);
1898*4882a593Smuzhiyun 
1899*4882a593Smuzhiyun 	if (copy_to_user(uinfo, &info, sz)) {
1900*4882a593Smuzhiyun 		ret = -EFAULT;
1901*4882a593Smuzhiyun 		goto bail;
1902*4882a593Smuzhiyun 	}
1903*4882a593Smuzhiyun 	ret = 0;
1904*4882a593Smuzhiyun 
1905*4882a593Smuzhiyun bail:
1906*4882a593Smuzhiyun 	return ret;
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun 
qib_sdma_get_inflight(struct qib_user_sdma_queue * pq,u32 __user * inflightp)1909*4882a593Smuzhiyun static int qib_sdma_get_inflight(struct qib_user_sdma_queue *pq,
1910*4882a593Smuzhiyun 				 u32 __user *inflightp)
1911*4882a593Smuzhiyun {
1912*4882a593Smuzhiyun 	const u32 val = qib_user_sdma_inflight_counter(pq);
1913*4882a593Smuzhiyun 
1914*4882a593Smuzhiyun 	if (put_user(val, inflightp))
1915*4882a593Smuzhiyun 		return -EFAULT;
1916*4882a593Smuzhiyun 
1917*4882a593Smuzhiyun 	return 0;
1918*4882a593Smuzhiyun }
1919*4882a593Smuzhiyun 
qib_sdma_get_complete(struct qib_pportdata * ppd,struct qib_user_sdma_queue * pq,u32 __user * completep)1920*4882a593Smuzhiyun static int qib_sdma_get_complete(struct qib_pportdata *ppd,
1921*4882a593Smuzhiyun 				 struct qib_user_sdma_queue *pq,
1922*4882a593Smuzhiyun 				 u32 __user *completep)
1923*4882a593Smuzhiyun {
1924*4882a593Smuzhiyun 	u32 val;
1925*4882a593Smuzhiyun 	int err;
1926*4882a593Smuzhiyun 
1927*4882a593Smuzhiyun 	if (!pq)
1928*4882a593Smuzhiyun 		return -EINVAL;
1929*4882a593Smuzhiyun 
1930*4882a593Smuzhiyun 	err = qib_user_sdma_make_progress(ppd, pq);
1931*4882a593Smuzhiyun 	if (err < 0)
1932*4882a593Smuzhiyun 		return err;
1933*4882a593Smuzhiyun 
1934*4882a593Smuzhiyun 	val = qib_user_sdma_complete_counter(pq);
1935*4882a593Smuzhiyun 	if (put_user(val, completep))
1936*4882a593Smuzhiyun 		return -EFAULT;
1937*4882a593Smuzhiyun 
1938*4882a593Smuzhiyun 	return 0;
1939*4882a593Smuzhiyun }
1940*4882a593Smuzhiyun 
disarm_req_delay(struct qib_ctxtdata * rcd)1941*4882a593Smuzhiyun static int disarm_req_delay(struct qib_ctxtdata *rcd)
1942*4882a593Smuzhiyun {
1943*4882a593Smuzhiyun 	int ret = 0;
1944*4882a593Smuzhiyun 
1945*4882a593Smuzhiyun 	if (!usable(rcd->ppd)) {
1946*4882a593Smuzhiyun 		int i;
1947*4882a593Smuzhiyun 		/*
1948*4882a593Smuzhiyun 		 * if link is down, or otherwise not usable, delay
1949*4882a593Smuzhiyun 		 * the caller up to 30 seconds, so we don't thrash
1950*4882a593Smuzhiyun 		 * in trying to get the chip back to ACTIVE, and
1951*4882a593Smuzhiyun 		 * set flag so they make the call again.
1952*4882a593Smuzhiyun 		 */
1953*4882a593Smuzhiyun 		if (rcd->user_event_mask) {
1954*4882a593Smuzhiyun 			/*
1955*4882a593Smuzhiyun 			 * subctxt_cnt is 0 if not shared, so do base
1956*4882a593Smuzhiyun 			 * separately, first, then remaining subctxt, if any
1957*4882a593Smuzhiyun 			 */
1958*4882a593Smuzhiyun 			set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
1959*4882a593Smuzhiyun 				&rcd->user_event_mask[0]);
1960*4882a593Smuzhiyun 			for (i = 1; i < rcd->subctxt_cnt; i++)
1961*4882a593Smuzhiyun 				set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
1962*4882a593Smuzhiyun 					&rcd->user_event_mask[i]);
1963*4882a593Smuzhiyun 		}
1964*4882a593Smuzhiyun 		for (i = 0; !usable(rcd->ppd) && i < 300; i++)
1965*4882a593Smuzhiyun 			msleep(100);
1966*4882a593Smuzhiyun 		ret = -ENETDOWN;
1967*4882a593Smuzhiyun 	}
1968*4882a593Smuzhiyun 	return ret;
1969*4882a593Smuzhiyun }
1970*4882a593Smuzhiyun 
1971*4882a593Smuzhiyun /*
1972*4882a593Smuzhiyun  * Find all user contexts in use, and set the specified bit in their
1973*4882a593Smuzhiyun  * event mask.
1974*4882a593Smuzhiyun  * See also find_ctxt() for a similar use, that is specific to send buffers.
1975*4882a593Smuzhiyun  */
qib_set_uevent_bits(struct qib_pportdata * ppd,const int evtbit)1976*4882a593Smuzhiyun int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit)
1977*4882a593Smuzhiyun {
1978*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
1979*4882a593Smuzhiyun 	unsigned ctxt;
1980*4882a593Smuzhiyun 	int ret = 0;
1981*4882a593Smuzhiyun 	unsigned long flags;
1982*4882a593Smuzhiyun 
1983*4882a593Smuzhiyun 	spin_lock_irqsave(&ppd->dd->uctxt_lock, flags);
1984*4882a593Smuzhiyun 	for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts;
1985*4882a593Smuzhiyun 	     ctxt++) {
1986*4882a593Smuzhiyun 		rcd = ppd->dd->rcd[ctxt];
1987*4882a593Smuzhiyun 		if (!rcd)
1988*4882a593Smuzhiyun 			continue;
1989*4882a593Smuzhiyun 		if (rcd->user_event_mask) {
1990*4882a593Smuzhiyun 			int i;
1991*4882a593Smuzhiyun 			/*
1992*4882a593Smuzhiyun 			 * subctxt_cnt is 0 if not shared, so do base
1993*4882a593Smuzhiyun 			 * separately, first, then remaining subctxt, if any
1994*4882a593Smuzhiyun 			 */
1995*4882a593Smuzhiyun 			set_bit(evtbit, &rcd->user_event_mask[0]);
1996*4882a593Smuzhiyun 			for (i = 1; i < rcd->subctxt_cnt; i++)
1997*4882a593Smuzhiyun 				set_bit(evtbit, &rcd->user_event_mask[i]);
1998*4882a593Smuzhiyun 		}
1999*4882a593Smuzhiyun 		ret = 1;
2000*4882a593Smuzhiyun 		break;
2001*4882a593Smuzhiyun 	}
2002*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags);
2003*4882a593Smuzhiyun 
2004*4882a593Smuzhiyun 	return ret;
2005*4882a593Smuzhiyun }
2006*4882a593Smuzhiyun 
2007*4882a593Smuzhiyun /*
2008*4882a593Smuzhiyun  * clear the event notifier events for this context.
2009*4882a593Smuzhiyun  * For the DISARM_BUFS case, we also take action (this obsoletes
2010*4882a593Smuzhiyun  * the older QIB_CMD_DISARM_BUFS, but we keep it for backwards
2011*4882a593Smuzhiyun  * compatibility.
2012*4882a593Smuzhiyun  * Other bits don't currently require actions, just atomically clear.
2013*4882a593Smuzhiyun  * User process then performs actions appropriate to bit having been
2014*4882a593Smuzhiyun  * set, if desired, and checks again in future.
2015*4882a593Smuzhiyun  */
qib_user_event_ack(struct qib_ctxtdata * rcd,int subctxt,unsigned long events)2016*4882a593Smuzhiyun static int qib_user_event_ack(struct qib_ctxtdata *rcd, int subctxt,
2017*4882a593Smuzhiyun 			      unsigned long events)
2018*4882a593Smuzhiyun {
2019*4882a593Smuzhiyun 	int ret = 0, i;
2020*4882a593Smuzhiyun 
2021*4882a593Smuzhiyun 	for (i = 0; i <= _QIB_MAX_EVENT_BIT; i++) {
2022*4882a593Smuzhiyun 		if (!test_bit(i, &events))
2023*4882a593Smuzhiyun 			continue;
2024*4882a593Smuzhiyun 		if (i == _QIB_EVENT_DISARM_BUFS_BIT) {
2025*4882a593Smuzhiyun 			(void)qib_disarm_piobufs_ifneeded(rcd);
2026*4882a593Smuzhiyun 			ret = disarm_req_delay(rcd);
2027*4882a593Smuzhiyun 		} else
2028*4882a593Smuzhiyun 			clear_bit(i, &rcd->user_event_mask[subctxt]);
2029*4882a593Smuzhiyun 	}
2030*4882a593Smuzhiyun 	return ret;
2031*4882a593Smuzhiyun }
2032*4882a593Smuzhiyun 
qib_write(struct file * fp,const char __user * data,size_t count,loff_t * off)2033*4882a593Smuzhiyun static ssize_t qib_write(struct file *fp, const char __user *data,
2034*4882a593Smuzhiyun 			 size_t count, loff_t *off)
2035*4882a593Smuzhiyun {
2036*4882a593Smuzhiyun 	const struct qib_cmd __user *ucmd;
2037*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd;
2038*4882a593Smuzhiyun 	const void __user *src;
2039*4882a593Smuzhiyun 	size_t consumed, copy = 0;
2040*4882a593Smuzhiyun 	struct qib_cmd cmd;
2041*4882a593Smuzhiyun 	ssize_t ret = 0;
2042*4882a593Smuzhiyun 	void *dest;
2043*4882a593Smuzhiyun 
2044*4882a593Smuzhiyun 	if (!ib_safe_file_access(fp)) {
2045*4882a593Smuzhiyun 		pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
2046*4882a593Smuzhiyun 			    task_tgid_vnr(current), current->comm);
2047*4882a593Smuzhiyun 		return -EACCES;
2048*4882a593Smuzhiyun 	}
2049*4882a593Smuzhiyun 
2050*4882a593Smuzhiyun 	if (count < sizeof(cmd.type)) {
2051*4882a593Smuzhiyun 		ret = -EINVAL;
2052*4882a593Smuzhiyun 		goto bail;
2053*4882a593Smuzhiyun 	}
2054*4882a593Smuzhiyun 
2055*4882a593Smuzhiyun 	ucmd = (const struct qib_cmd __user *) data;
2056*4882a593Smuzhiyun 
2057*4882a593Smuzhiyun 	if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
2058*4882a593Smuzhiyun 		ret = -EFAULT;
2059*4882a593Smuzhiyun 		goto bail;
2060*4882a593Smuzhiyun 	}
2061*4882a593Smuzhiyun 
2062*4882a593Smuzhiyun 	consumed = sizeof(cmd.type);
2063*4882a593Smuzhiyun 
2064*4882a593Smuzhiyun 	switch (cmd.type) {
2065*4882a593Smuzhiyun 	case QIB_CMD_ASSIGN_CTXT:
2066*4882a593Smuzhiyun 	case QIB_CMD_USER_INIT:
2067*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.user_info);
2068*4882a593Smuzhiyun 		dest = &cmd.cmd.user_info;
2069*4882a593Smuzhiyun 		src = &ucmd->cmd.user_info;
2070*4882a593Smuzhiyun 		break;
2071*4882a593Smuzhiyun 
2072*4882a593Smuzhiyun 	case QIB_CMD_RECV_CTRL:
2073*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.recv_ctrl);
2074*4882a593Smuzhiyun 		dest = &cmd.cmd.recv_ctrl;
2075*4882a593Smuzhiyun 		src = &ucmd->cmd.recv_ctrl;
2076*4882a593Smuzhiyun 		break;
2077*4882a593Smuzhiyun 
2078*4882a593Smuzhiyun 	case QIB_CMD_CTXT_INFO:
2079*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.ctxt_info);
2080*4882a593Smuzhiyun 		dest = &cmd.cmd.ctxt_info;
2081*4882a593Smuzhiyun 		src = &ucmd->cmd.ctxt_info;
2082*4882a593Smuzhiyun 		break;
2083*4882a593Smuzhiyun 
2084*4882a593Smuzhiyun 	case QIB_CMD_TID_UPDATE:
2085*4882a593Smuzhiyun 	case QIB_CMD_TID_FREE:
2086*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.tid_info);
2087*4882a593Smuzhiyun 		dest = &cmd.cmd.tid_info;
2088*4882a593Smuzhiyun 		src = &ucmd->cmd.tid_info;
2089*4882a593Smuzhiyun 		break;
2090*4882a593Smuzhiyun 
2091*4882a593Smuzhiyun 	case QIB_CMD_SET_PART_KEY:
2092*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.part_key);
2093*4882a593Smuzhiyun 		dest = &cmd.cmd.part_key;
2094*4882a593Smuzhiyun 		src = &ucmd->cmd.part_key;
2095*4882a593Smuzhiyun 		break;
2096*4882a593Smuzhiyun 
2097*4882a593Smuzhiyun 	case QIB_CMD_DISARM_BUFS:
2098*4882a593Smuzhiyun 	case QIB_CMD_PIOAVAILUPD: /* force an update of PIOAvail reg */
2099*4882a593Smuzhiyun 		copy = 0;
2100*4882a593Smuzhiyun 		src = NULL;
2101*4882a593Smuzhiyun 		dest = NULL;
2102*4882a593Smuzhiyun 		break;
2103*4882a593Smuzhiyun 
2104*4882a593Smuzhiyun 	case QIB_CMD_POLL_TYPE:
2105*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.poll_type);
2106*4882a593Smuzhiyun 		dest = &cmd.cmd.poll_type;
2107*4882a593Smuzhiyun 		src = &ucmd->cmd.poll_type;
2108*4882a593Smuzhiyun 		break;
2109*4882a593Smuzhiyun 
2110*4882a593Smuzhiyun 	case QIB_CMD_ARMLAUNCH_CTRL:
2111*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.armlaunch_ctrl);
2112*4882a593Smuzhiyun 		dest = &cmd.cmd.armlaunch_ctrl;
2113*4882a593Smuzhiyun 		src = &ucmd->cmd.armlaunch_ctrl;
2114*4882a593Smuzhiyun 		break;
2115*4882a593Smuzhiyun 
2116*4882a593Smuzhiyun 	case QIB_CMD_SDMA_INFLIGHT:
2117*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.sdma_inflight);
2118*4882a593Smuzhiyun 		dest = &cmd.cmd.sdma_inflight;
2119*4882a593Smuzhiyun 		src = &ucmd->cmd.sdma_inflight;
2120*4882a593Smuzhiyun 		break;
2121*4882a593Smuzhiyun 
2122*4882a593Smuzhiyun 	case QIB_CMD_SDMA_COMPLETE:
2123*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.sdma_complete);
2124*4882a593Smuzhiyun 		dest = &cmd.cmd.sdma_complete;
2125*4882a593Smuzhiyun 		src = &ucmd->cmd.sdma_complete;
2126*4882a593Smuzhiyun 		break;
2127*4882a593Smuzhiyun 
2128*4882a593Smuzhiyun 	case QIB_CMD_ACK_EVENT:
2129*4882a593Smuzhiyun 		copy = sizeof(cmd.cmd.event_mask);
2130*4882a593Smuzhiyun 		dest = &cmd.cmd.event_mask;
2131*4882a593Smuzhiyun 		src = &ucmd->cmd.event_mask;
2132*4882a593Smuzhiyun 		break;
2133*4882a593Smuzhiyun 
2134*4882a593Smuzhiyun 	default:
2135*4882a593Smuzhiyun 		ret = -EINVAL;
2136*4882a593Smuzhiyun 		goto bail;
2137*4882a593Smuzhiyun 	}
2138*4882a593Smuzhiyun 
2139*4882a593Smuzhiyun 	if (copy) {
2140*4882a593Smuzhiyun 		if ((count - consumed) < copy) {
2141*4882a593Smuzhiyun 			ret = -EINVAL;
2142*4882a593Smuzhiyun 			goto bail;
2143*4882a593Smuzhiyun 		}
2144*4882a593Smuzhiyun 		if (copy_from_user(dest, src, copy)) {
2145*4882a593Smuzhiyun 			ret = -EFAULT;
2146*4882a593Smuzhiyun 			goto bail;
2147*4882a593Smuzhiyun 		}
2148*4882a593Smuzhiyun 		consumed += copy;
2149*4882a593Smuzhiyun 	}
2150*4882a593Smuzhiyun 
2151*4882a593Smuzhiyun 	rcd = ctxt_fp(fp);
2152*4882a593Smuzhiyun 	if (!rcd && cmd.type != QIB_CMD_ASSIGN_CTXT) {
2153*4882a593Smuzhiyun 		ret = -EINVAL;
2154*4882a593Smuzhiyun 		goto bail;
2155*4882a593Smuzhiyun 	}
2156*4882a593Smuzhiyun 
2157*4882a593Smuzhiyun 	switch (cmd.type) {
2158*4882a593Smuzhiyun 	case QIB_CMD_ASSIGN_CTXT:
2159*4882a593Smuzhiyun 		if (rcd) {
2160*4882a593Smuzhiyun 			ret = -EINVAL;
2161*4882a593Smuzhiyun 			goto bail;
2162*4882a593Smuzhiyun 		}
2163*4882a593Smuzhiyun 
2164*4882a593Smuzhiyun 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
2165*4882a593Smuzhiyun 		if (ret)
2166*4882a593Smuzhiyun 			goto bail;
2167*4882a593Smuzhiyun 		break;
2168*4882a593Smuzhiyun 
2169*4882a593Smuzhiyun 	case QIB_CMD_USER_INIT:
2170*4882a593Smuzhiyun 		ret = qib_do_user_init(fp, &cmd.cmd.user_info);
2171*4882a593Smuzhiyun 		if (ret)
2172*4882a593Smuzhiyun 			goto bail;
2173*4882a593Smuzhiyun 		ret = qib_get_base_info(fp, u64_to_user_ptr(
2174*4882a593Smuzhiyun 					  cmd.cmd.user_info.spu_base_info),
2175*4882a593Smuzhiyun 					cmd.cmd.user_info.spu_base_info_size);
2176*4882a593Smuzhiyun 		break;
2177*4882a593Smuzhiyun 
2178*4882a593Smuzhiyun 	case QIB_CMD_RECV_CTRL:
2179*4882a593Smuzhiyun 		ret = qib_manage_rcvq(rcd, subctxt_fp(fp), cmd.cmd.recv_ctrl);
2180*4882a593Smuzhiyun 		break;
2181*4882a593Smuzhiyun 
2182*4882a593Smuzhiyun 	case QIB_CMD_CTXT_INFO:
2183*4882a593Smuzhiyun 		ret = qib_ctxt_info(fp, (struct qib_ctxt_info __user *)
2184*4882a593Smuzhiyun 				    (unsigned long) cmd.cmd.ctxt_info);
2185*4882a593Smuzhiyun 		break;
2186*4882a593Smuzhiyun 
2187*4882a593Smuzhiyun 	case QIB_CMD_TID_UPDATE:
2188*4882a593Smuzhiyun 		ret = qib_tid_update(rcd, fp, &cmd.cmd.tid_info);
2189*4882a593Smuzhiyun 		break;
2190*4882a593Smuzhiyun 
2191*4882a593Smuzhiyun 	case QIB_CMD_TID_FREE:
2192*4882a593Smuzhiyun 		ret = qib_tid_free(rcd, subctxt_fp(fp), &cmd.cmd.tid_info);
2193*4882a593Smuzhiyun 		break;
2194*4882a593Smuzhiyun 
2195*4882a593Smuzhiyun 	case QIB_CMD_SET_PART_KEY:
2196*4882a593Smuzhiyun 		ret = qib_set_part_key(rcd, cmd.cmd.part_key);
2197*4882a593Smuzhiyun 		break;
2198*4882a593Smuzhiyun 
2199*4882a593Smuzhiyun 	case QIB_CMD_DISARM_BUFS:
2200*4882a593Smuzhiyun 		(void)qib_disarm_piobufs_ifneeded(rcd);
2201*4882a593Smuzhiyun 		ret = disarm_req_delay(rcd);
2202*4882a593Smuzhiyun 		break;
2203*4882a593Smuzhiyun 
2204*4882a593Smuzhiyun 	case QIB_CMD_PIOAVAILUPD:
2205*4882a593Smuzhiyun 		qib_force_pio_avail_update(rcd->dd);
2206*4882a593Smuzhiyun 		break;
2207*4882a593Smuzhiyun 
2208*4882a593Smuzhiyun 	case QIB_CMD_POLL_TYPE:
2209*4882a593Smuzhiyun 		rcd->poll_type = cmd.cmd.poll_type;
2210*4882a593Smuzhiyun 		break;
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun 	case QIB_CMD_ARMLAUNCH_CTRL:
2213*4882a593Smuzhiyun 		rcd->dd->f_set_armlaunch(rcd->dd, cmd.cmd.armlaunch_ctrl);
2214*4882a593Smuzhiyun 		break;
2215*4882a593Smuzhiyun 
2216*4882a593Smuzhiyun 	case QIB_CMD_SDMA_INFLIGHT:
2217*4882a593Smuzhiyun 		ret = qib_sdma_get_inflight(user_sdma_queue_fp(fp),
2218*4882a593Smuzhiyun 					    (u32 __user *) (unsigned long)
2219*4882a593Smuzhiyun 					    cmd.cmd.sdma_inflight);
2220*4882a593Smuzhiyun 		break;
2221*4882a593Smuzhiyun 
2222*4882a593Smuzhiyun 	case QIB_CMD_SDMA_COMPLETE:
2223*4882a593Smuzhiyun 		ret = qib_sdma_get_complete(rcd->ppd,
2224*4882a593Smuzhiyun 					    user_sdma_queue_fp(fp),
2225*4882a593Smuzhiyun 					    (u32 __user *) (unsigned long)
2226*4882a593Smuzhiyun 					    cmd.cmd.sdma_complete);
2227*4882a593Smuzhiyun 		break;
2228*4882a593Smuzhiyun 
2229*4882a593Smuzhiyun 	case QIB_CMD_ACK_EVENT:
2230*4882a593Smuzhiyun 		ret = qib_user_event_ack(rcd, subctxt_fp(fp),
2231*4882a593Smuzhiyun 					 cmd.cmd.event_mask);
2232*4882a593Smuzhiyun 		break;
2233*4882a593Smuzhiyun 	}
2234*4882a593Smuzhiyun 
2235*4882a593Smuzhiyun 	if (ret >= 0)
2236*4882a593Smuzhiyun 		ret = consumed;
2237*4882a593Smuzhiyun 
2238*4882a593Smuzhiyun bail:
2239*4882a593Smuzhiyun 	return ret;
2240*4882a593Smuzhiyun }
2241*4882a593Smuzhiyun 
qib_write_iter(struct kiocb * iocb,struct iov_iter * from)2242*4882a593Smuzhiyun static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
2243*4882a593Smuzhiyun {
2244*4882a593Smuzhiyun 	struct qib_filedata *fp = iocb->ki_filp->private_data;
2245*4882a593Smuzhiyun 	struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
2246*4882a593Smuzhiyun 	struct qib_user_sdma_queue *pq = fp->pq;
2247*4882a593Smuzhiyun 
2248*4882a593Smuzhiyun 	if (!iter_is_iovec(from) || !from->nr_segs || !pq)
2249*4882a593Smuzhiyun 		return -EINVAL;
2250*4882a593Smuzhiyun 
2251*4882a593Smuzhiyun 	return qib_user_sdma_writev(rcd, pq, from->iov, from->nr_segs);
2252*4882a593Smuzhiyun }
2253*4882a593Smuzhiyun 
2254*4882a593Smuzhiyun static struct class *qib_class;
2255*4882a593Smuzhiyun static dev_t qib_dev;
2256*4882a593Smuzhiyun 
qib_cdev_init(int minor,const char * name,const struct file_operations * fops,struct cdev ** cdevp,struct device ** devp)2257*4882a593Smuzhiyun int qib_cdev_init(int minor, const char *name,
2258*4882a593Smuzhiyun 		  const struct file_operations *fops,
2259*4882a593Smuzhiyun 		  struct cdev **cdevp, struct device **devp)
2260*4882a593Smuzhiyun {
2261*4882a593Smuzhiyun 	const dev_t dev = MKDEV(MAJOR(qib_dev), minor);
2262*4882a593Smuzhiyun 	struct cdev *cdev;
2263*4882a593Smuzhiyun 	struct device *device = NULL;
2264*4882a593Smuzhiyun 	int ret;
2265*4882a593Smuzhiyun 
2266*4882a593Smuzhiyun 	cdev = cdev_alloc();
2267*4882a593Smuzhiyun 	if (!cdev) {
2268*4882a593Smuzhiyun 		pr_err("Could not allocate cdev for minor %d, %s\n",
2269*4882a593Smuzhiyun 		       minor, name);
2270*4882a593Smuzhiyun 		ret = -ENOMEM;
2271*4882a593Smuzhiyun 		goto done;
2272*4882a593Smuzhiyun 	}
2273*4882a593Smuzhiyun 
2274*4882a593Smuzhiyun 	cdev->owner = THIS_MODULE;
2275*4882a593Smuzhiyun 	cdev->ops = fops;
2276*4882a593Smuzhiyun 	kobject_set_name(&cdev->kobj, name);
2277*4882a593Smuzhiyun 
2278*4882a593Smuzhiyun 	ret = cdev_add(cdev, dev, 1);
2279*4882a593Smuzhiyun 	if (ret < 0) {
2280*4882a593Smuzhiyun 		pr_err("Could not add cdev for minor %d, %s (err %d)\n",
2281*4882a593Smuzhiyun 		       minor, name, -ret);
2282*4882a593Smuzhiyun 		goto err_cdev;
2283*4882a593Smuzhiyun 	}
2284*4882a593Smuzhiyun 
2285*4882a593Smuzhiyun 	device = device_create(qib_class, NULL, dev, NULL, "%s", name);
2286*4882a593Smuzhiyun 	if (!IS_ERR(device))
2287*4882a593Smuzhiyun 		goto done;
2288*4882a593Smuzhiyun 	ret = PTR_ERR(device);
2289*4882a593Smuzhiyun 	device = NULL;
2290*4882a593Smuzhiyun 	pr_err("Could not create device for minor %d, %s (err %d)\n",
2291*4882a593Smuzhiyun 	       minor, name, -ret);
2292*4882a593Smuzhiyun err_cdev:
2293*4882a593Smuzhiyun 	cdev_del(cdev);
2294*4882a593Smuzhiyun 	cdev = NULL;
2295*4882a593Smuzhiyun done:
2296*4882a593Smuzhiyun 	*cdevp = cdev;
2297*4882a593Smuzhiyun 	*devp = device;
2298*4882a593Smuzhiyun 	return ret;
2299*4882a593Smuzhiyun }
2300*4882a593Smuzhiyun 
qib_cdev_cleanup(struct cdev ** cdevp,struct device ** devp)2301*4882a593Smuzhiyun void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp)
2302*4882a593Smuzhiyun {
2303*4882a593Smuzhiyun 	struct device *device = *devp;
2304*4882a593Smuzhiyun 
2305*4882a593Smuzhiyun 	if (device) {
2306*4882a593Smuzhiyun 		device_unregister(device);
2307*4882a593Smuzhiyun 		*devp = NULL;
2308*4882a593Smuzhiyun 	}
2309*4882a593Smuzhiyun 
2310*4882a593Smuzhiyun 	if (*cdevp) {
2311*4882a593Smuzhiyun 		cdev_del(*cdevp);
2312*4882a593Smuzhiyun 		*cdevp = NULL;
2313*4882a593Smuzhiyun 	}
2314*4882a593Smuzhiyun }
2315*4882a593Smuzhiyun 
2316*4882a593Smuzhiyun static struct cdev *wildcard_cdev;
2317*4882a593Smuzhiyun static struct device *wildcard_device;
2318*4882a593Smuzhiyun 
qib_dev_init(void)2319*4882a593Smuzhiyun int __init qib_dev_init(void)
2320*4882a593Smuzhiyun {
2321*4882a593Smuzhiyun 	int ret;
2322*4882a593Smuzhiyun 
2323*4882a593Smuzhiyun 	ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME);
2324*4882a593Smuzhiyun 	if (ret < 0) {
2325*4882a593Smuzhiyun 		pr_err("Could not allocate chrdev region (err %d)\n", -ret);
2326*4882a593Smuzhiyun 		goto done;
2327*4882a593Smuzhiyun 	}
2328*4882a593Smuzhiyun 
2329*4882a593Smuzhiyun 	qib_class = class_create(THIS_MODULE, "ipath");
2330*4882a593Smuzhiyun 	if (IS_ERR(qib_class)) {
2331*4882a593Smuzhiyun 		ret = PTR_ERR(qib_class);
2332*4882a593Smuzhiyun 		pr_err("Could not create device class (err %d)\n", -ret);
2333*4882a593Smuzhiyun 		unregister_chrdev_region(qib_dev, QIB_NMINORS);
2334*4882a593Smuzhiyun 	}
2335*4882a593Smuzhiyun 
2336*4882a593Smuzhiyun done:
2337*4882a593Smuzhiyun 	return ret;
2338*4882a593Smuzhiyun }
2339*4882a593Smuzhiyun 
qib_dev_cleanup(void)2340*4882a593Smuzhiyun void qib_dev_cleanup(void)
2341*4882a593Smuzhiyun {
2342*4882a593Smuzhiyun 	if (qib_class) {
2343*4882a593Smuzhiyun 		class_destroy(qib_class);
2344*4882a593Smuzhiyun 		qib_class = NULL;
2345*4882a593Smuzhiyun 	}
2346*4882a593Smuzhiyun 
2347*4882a593Smuzhiyun 	unregister_chrdev_region(qib_dev, QIB_NMINORS);
2348*4882a593Smuzhiyun }
2349*4882a593Smuzhiyun 
2350*4882a593Smuzhiyun static atomic_t user_count = ATOMIC_INIT(0);
2351*4882a593Smuzhiyun 
qib_user_remove(struct qib_devdata * dd)2352*4882a593Smuzhiyun static void qib_user_remove(struct qib_devdata *dd)
2353*4882a593Smuzhiyun {
2354*4882a593Smuzhiyun 	if (atomic_dec_return(&user_count) == 0)
2355*4882a593Smuzhiyun 		qib_cdev_cleanup(&wildcard_cdev, &wildcard_device);
2356*4882a593Smuzhiyun 
2357*4882a593Smuzhiyun 	qib_cdev_cleanup(&dd->user_cdev, &dd->user_device);
2358*4882a593Smuzhiyun }
2359*4882a593Smuzhiyun 
qib_user_add(struct qib_devdata * dd)2360*4882a593Smuzhiyun static int qib_user_add(struct qib_devdata *dd)
2361*4882a593Smuzhiyun {
2362*4882a593Smuzhiyun 	char name[10];
2363*4882a593Smuzhiyun 	int ret;
2364*4882a593Smuzhiyun 
2365*4882a593Smuzhiyun 	if (atomic_inc_return(&user_count) == 1) {
2366*4882a593Smuzhiyun 		ret = qib_cdev_init(0, "ipath", &qib_file_ops,
2367*4882a593Smuzhiyun 				    &wildcard_cdev, &wildcard_device);
2368*4882a593Smuzhiyun 		if (ret)
2369*4882a593Smuzhiyun 			goto done;
2370*4882a593Smuzhiyun 	}
2371*4882a593Smuzhiyun 
2372*4882a593Smuzhiyun 	snprintf(name, sizeof(name), "ipath%d", dd->unit);
2373*4882a593Smuzhiyun 	ret = qib_cdev_init(dd->unit + 1, name, &qib_file_ops,
2374*4882a593Smuzhiyun 			    &dd->user_cdev, &dd->user_device);
2375*4882a593Smuzhiyun 	if (ret)
2376*4882a593Smuzhiyun 		qib_user_remove(dd);
2377*4882a593Smuzhiyun done:
2378*4882a593Smuzhiyun 	return ret;
2379*4882a593Smuzhiyun }
2380*4882a593Smuzhiyun 
2381*4882a593Smuzhiyun /*
2382*4882a593Smuzhiyun  * Create per-unit files in /dev
2383*4882a593Smuzhiyun  */
qib_device_create(struct qib_devdata * dd)2384*4882a593Smuzhiyun int qib_device_create(struct qib_devdata *dd)
2385*4882a593Smuzhiyun {
2386*4882a593Smuzhiyun 	int r, ret;
2387*4882a593Smuzhiyun 
2388*4882a593Smuzhiyun 	r = qib_user_add(dd);
2389*4882a593Smuzhiyun 	ret = qib_diag_add(dd);
2390*4882a593Smuzhiyun 	if (r && !ret)
2391*4882a593Smuzhiyun 		ret = r;
2392*4882a593Smuzhiyun 	return ret;
2393*4882a593Smuzhiyun }
2394*4882a593Smuzhiyun 
2395*4882a593Smuzhiyun /*
2396*4882a593Smuzhiyun  * Remove per-unit files in /dev
2397*4882a593Smuzhiyun  * void, core kernel returns no errors for this stuff
2398*4882a593Smuzhiyun  */
qib_device_remove(struct qib_devdata * dd)2399*4882a593Smuzhiyun void qib_device_remove(struct qib_devdata *dd)
2400*4882a593Smuzhiyun {
2401*4882a593Smuzhiyun 	qib_user_remove(dd);
2402*4882a593Smuzhiyun 	qib_diag_remove(dd);
2403*4882a593Smuzhiyun }
2404