xref: /OK3568_Linux_fs/kernel/drivers/infiniband/sw/siw/siw_mem.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4*4882a593Smuzhiyun /* Copyright (c) 2008-2019, IBM Corporation */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/gfp.h>
7*4882a593Smuzhiyun #include <rdma/ib_verbs.h>
8*4882a593Smuzhiyun #include <linux/dma-mapping.h>
9*4882a593Smuzhiyun #include <linux/slab.h>
10*4882a593Smuzhiyun #include <linux/sched/mm.h>
11*4882a593Smuzhiyun #include <linux/resource.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include "siw.h"
14*4882a593Smuzhiyun #include "siw_mem.h"
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun /*
17*4882a593Smuzhiyun  * Stag lookup is based on its index part only (24 bits).
18*4882a593Smuzhiyun  * The code avoids special Stag of zero and tries to randomize
19*4882a593Smuzhiyun  * STag values between 1 and SIW_STAG_MAX_INDEX.
20*4882a593Smuzhiyun  */
siw_mem_add(struct siw_device * sdev,struct siw_mem * m)21*4882a593Smuzhiyun int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
22*4882a593Smuzhiyun {
23*4882a593Smuzhiyun 	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
24*4882a593Smuzhiyun 	u32 id, next;
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	get_random_bytes(&next, 4);
27*4882a593Smuzhiyun 	next &= 0x00ffffff;
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
30*4882a593Smuzhiyun 	    GFP_KERNEL) < 0)
31*4882a593Smuzhiyun 		return -ENOMEM;
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun 	/* Set the STag index part */
34*4882a593Smuzhiyun 	m->stag = id << 8;
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 	siw_dbg_mem(m, "new MEM object\n");
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun 	return 0;
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun /*
42*4882a593Smuzhiyun  * siw_mem_id2obj()
43*4882a593Smuzhiyun  *
44*4882a593Smuzhiyun  * resolves memory from stag given by id. might be called from:
45*4882a593Smuzhiyun  * o process context before sending out of sgl, or
46*4882a593Smuzhiyun  * o in softirq when resolving target memory
47*4882a593Smuzhiyun  */
siw_mem_id2obj(struct siw_device * sdev,int stag_index)48*4882a593Smuzhiyun struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	struct siw_mem *mem;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	rcu_read_lock();
53*4882a593Smuzhiyun 	mem = xa_load(&sdev->mem_xa, stag_index);
54*4882a593Smuzhiyun 	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
55*4882a593Smuzhiyun 		rcu_read_unlock();
56*4882a593Smuzhiyun 		return mem;
57*4882a593Smuzhiyun 	}
58*4882a593Smuzhiyun 	rcu_read_unlock();
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	return NULL;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
siw_free_plist(struct siw_page_chunk * chunk,int num_pages,bool dirty)63*4882a593Smuzhiyun static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
64*4882a593Smuzhiyun 			   bool dirty)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun 
siw_umem_release(struct siw_umem * umem,bool dirty)69*4882a593Smuzhiyun void siw_umem_release(struct siw_umem *umem, bool dirty)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun 	struct mm_struct *mm_s = umem->owning_mm;
72*4882a593Smuzhiyun 	int i, num_pages = umem->num_pages;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	for (i = 0; num_pages; i++) {
75*4882a593Smuzhiyun 		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 		siw_free_plist(&umem->page_chunk[i], to_free,
78*4882a593Smuzhiyun 			       umem->writable && dirty);
79*4882a593Smuzhiyun 		kfree(umem->page_chunk[i].plist);
80*4882a593Smuzhiyun 		num_pages -= to_free;
81*4882a593Smuzhiyun 	}
82*4882a593Smuzhiyun 	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	mmdrop(mm_s);
85*4882a593Smuzhiyun 	kfree(umem->page_chunk);
86*4882a593Smuzhiyun 	kfree(umem);
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun 
siw_mr_add_mem(struct siw_mr * mr,struct ib_pd * pd,void * mem_obj,u64 start,u64 len,int rights)89*4882a593Smuzhiyun int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
90*4882a593Smuzhiyun 		   u64 start, u64 len, int rights)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	struct siw_device *sdev = to_siw_dev(pd->device);
93*4882a593Smuzhiyun 	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
94*4882a593Smuzhiyun 	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
95*4882a593Smuzhiyun 	u32 id, next;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	if (!mem)
98*4882a593Smuzhiyun 		return -ENOMEM;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	mem->mem_obj = mem_obj;
101*4882a593Smuzhiyun 	mem->stag_valid = 0;
102*4882a593Smuzhiyun 	mem->sdev = sdev;
103*4882a593Smuzhiyun 	mem->va = start;
104*4882a593Smuzhiyun 	mem->len = len;
105*4882a593Smuzhiyun 	mem->pd = pd;
106*4882a593Smuzhiyun 	mem->perms = rights & IWARP_ACCESS_MASK;
107*4882a593Smuzhiyun 	kref_init(&mem->ref);
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	get_random_bytes(&next, 4);
110*4882a593Smuzhiyun 	next &= 0x00ffffff;
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
113*4882a593Smuzhiyun 	    GFP_KERNEL) < 0) {
114*4882a593Smuzhiyun 		kfree(mem);
115*4882a593Smuzhiyun 		return -ENOMEM;
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	mr->mem = mem;
119*4882a593Smuzhiyun 	/* Set the STag index part */
120*4882a593Smuzhiyun 	mem->stag = id << 8;
121*4882a593Smuzhiyun 	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	return 0;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun 
siw_mr_drop_mem(struct siw_mr * mr)126*4882a593Smuzhiyun void siw_mr_drop_mem(struct siw_mr *mr)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun 	struct siw_mem *mem = mr->mem, *found;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	mem->stag_valid = 0;
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	/* make STag invalid visible asap */
133*4882a593Smuzhiyun 	smp_mb();
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
136*4882a593Smuzhiyun 	WARN_ON(found != mem);
137*4882a593Smuzhiyun 	siw_mem_put(mem);
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun 
siw_free_mem(struct kref * ref)140*4882a593Smuzhiyun void siw_free_mem(struct kref *ref)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun 	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	if (!mem->is_mw && mem->mem_obj) {
147*4882a593Smuzhiyun 		if (mem->is_pbl == 0)
148*4882a593Smuzhiyun 			siw_umem_release(mem->umem, true);
149*4882a593Smuzhiyun 		else
150*4882a593Smuzhiyun 			kfree(mem->pbl);
151*4882a593Smuzhiyun 	}
152*4882a593Smuzhiyun 	kfree(mem);
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun /*
156*4882a593Smuzhiyun  * siw_check_mem()
157*4882a593Smuzhiyun  *
158*4882a593Smuzhiyun  * Check protection domain, STAG state, access permissions and
159*4882a593Smuzhiyun  * address range for memory object.
160*4882a593Smuzhiyun  *
161*4882a593Smuzhiyun  * @pd:		Protection Domain memory should belong to
162*4882a593Smuzhiyun  * @mem:	memory to be checked
163*4882a593Smuzhiyun  * @addr:	starting addr of mem
164*4882a593Smuzhiyun  * @perms:	requested access permissions
165*4882a593Smuzhiyun  * @len:	len of memory interval to be checked
166*4882a593Smuzhiyun  *
167*4882a593Smuzhiyun  */
siw_check_mem(struct ib_pd * pd,struct siw_mem * mem,u64 addr,enum ib_access_flags perms,int len)168*4882a593Smuzhiyun int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
169*4882a593Smuzhiyun 		  enum ib_access_flags perms, int len)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	if (!mem->stag_valid) {
172*4882a593Smuzhiyun 		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
173*4882a593Smuzhiyun 		return -E_STAG_INVALID;
174*4882a593Smuzhiyun 	}
175*4882a593Smuzhiyun 	if (mem->pd != pd) {
176*4882a593Smuzhiyun 		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
177*4882a593Smuzhiyun 		return -E_PD_MISMATCH;
178*4882a593Smuzhiyun 	}
179*4882a593Smuzhiyun 	/*
180*4882a593Smuzhiyun 	 * check access permissions
181*4882a593Smuzhiyun 	 */
182*4882a593Smuzhiyun 	if ((mem->perms & perms) < perms) {
183*4882a593Smuzhiyun 		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
184*4882a593Smuzhiyun 			   mem->perms, perms);
185*4882a593Smuzhiyun 		return -E_ACCESS_PERM;
186*4882a593Smuzhiyun 	}
187*4882a593Smuzhiyun 	/*
188*4882a593Smuzhiyun 	 * Check if access falls into valid memory interval.
189*4882a593Smuzhiyun 	 */
190*4882a593Smuzhiyun 	if (addr < mem->va || addr + len > mem->va + mem->len) {
191*4882a593Smuzhiyun 		siw_dbg_pd(pd, "MEM interval len %d\n", len);
192*4882a593Smuzhiyun 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
193*4882a593Smuzhiyun 			   (void *)(uintptr_t)addr,
194*4882a593Smuzhiyun 			   (void *)(uintptr_t)(addr + len));
195*4882a593Smuzhiyun 		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
196*4882a593Smuzhiyun 			   (void *)(uintptr_t)mem->va,
197*4882a593Smuzhiyun 			   (void *)(uintptr_t)(mem->va + mem->len),
198*4882a593Smuzhiyun 			   mem->stag);
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 		return -E_BASE_BOUNDS;
201*4882a593Smuzhiyun 	}
202*4882a593Smuzhiyun 	return E_ACCESS_OK;
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun /*
206*4882a593Smuzhiyun  * siw_check_sge()
207*4882a593Smuzhiyun  *
208*4882a593Smuzhiyun  * Check SGE for access rights in given interval
209*4882a593Smuzhiyun  *
210*4882a593Smuzhiyun  * @pd:		Protection Domain memory should belong to
211*4882a593Smuzhiyun  * @sge:	SGE to be checked
212*4882a593Smuzhiyun  * @mem:	location of memory reference within array
213*4882a593Smuzhiyun  * @perms:	requested access permissions
214*4882a593Smuzhiyun  * @off:	starting offset in SGE
215*4882a593Smuzhiyun  * @len:	len of memory interval to be checked
216*4882a593Smuzhiyun  *
217*4882a593Smuzhiyun  * NOTE: Function references SGE's memory object (mem->obj)
218*4882a593Smuzhiyun  * if not yet done. New reference is kept if check went ok and
219*4882a593Smuzhiyun  * released if check failed. If mem->obj is already valid, no new
220*4882a593Smuzhiyun  * lookup is being done and mem is not released it check fails.
221*4882a593Smuzhiyun  */
siw_check_sge(struct ib_pd * pd,struct siw_sge * sge,struct siw_mem * mem[],enum ib_access_flags perms,u32 off,int len)222*4882a593Smuzhiyun int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
223*4882a593Smuzhiyun 		  enum ib_access_flags perms, u32 off, int len)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	struct siw_device *sdev = to_siw_dev(pd->device);
226*4882a593Smuzhiyun 	struct siw_mem *new = NULL;
227*4882a593Smuzhiyun 	int rv = E_ACCESS_OK;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	if (len + off > sge->length) {
230*4882a593Smuzhiyun 		rv = -E_BASE_BOUNDS;
231*4882a593Smuzhiyun 		goto fail;
232*4882a593Smuzhiyun 	}
233*4882a593Smuzhiyun 	if (*mem == NULL) {
234*4882a593Smuzhiyun 		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
235*4882a593Smuzhiyun 		if (unlikely(!new)) {
236*4882a593Smuzhiyun 			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
237*4882a593Smuzhiyun 			rv = -E_STAG_INVALID;
238*4882a593Smuzhiyun 			goto fail;
239*4882a593Smuzhiyun 		}
240*4882a593Smuzhiyun 		*mem = new;
241*4882a593Smuzhiyun 	}
242*4882a593Smuzhiyun 	/* Check if user re-registered with different STag key */
243*4882a593Smuzhiyun 	if (unlikely((*mem)->stag != sge->lkey)) {
244*4882a593Smuzhiyun 		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
245*4882a593Smuzhiyun 		rv = -E_STAG_INVALID;
246*4882a593Smuzhiyun 		goto fail;
247*4882a593Smuzhiyun 	}
248*4882a593Smuzhiyun 	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
249*4882a593Smuzhiyun 	if (unlikely(rv))
250*4882a593Smuzhiyun 		goto fail;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	return 0;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun fail:
255*4882a593Smuzhiyun 	if (new) {
256*4882a593Smuzhiyun 		*mem = NULL;
257*4882a593Smuzhiyun 		siw_mem_put(new);
258*4882a593Smuzhiyun 	}
259*4882a593Smuzhiyun 	return rv;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun 
siw_wqe_put_mem(struct siw_wqe * wqe,enum siw_opcode op)262*4882a593Smuzhiyun void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun 	switch (op) {
265*4882a593Smuzhiyun 	case SIW_OP_SEND:
266*4882a593Smuzhiyun 	case SIW_OP_WRITE:
267*4882a593Smuzhiyun 	case SIW_OP_SEND_WITH_IMM:
268*4882a593Smuzhiyun 	case SIW_OP_SEND_REMOTE_INV:
269*4882a593Smuzhiyun 	case SIW_OP_READ:
270*4882a593Smuzhiyun 	case SIW_OP_READ_LOCAL_INV:
271*4882a593Smuzhiyun 		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
272*4882a593Smuzhiyun 			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
273*4882a593Smuzhiyun 		break;
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	case SIW_OP_RECEIVE:
276*4882a593Smuzhiyun 		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
277*4882a593Smuzhiyun 		break;
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	case SIW_OP_READ_RESPONSE:
280*4882a593Smuzhiyun 		siw_unref_mem_sgl(wqe->mem, 1);
281*4882a593Smuzhiyun 		break;
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	default:
284*4882a593Smuzhiyun 		/*
285*4882a593Smuzhiyun 		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
286*4882a593Smuzhiyun 		 * do not hold memory references
287*4882a593Smuzhiyun 		 */
288*4882a593Smuzhiyun 		break;
289*4882a593Smuzhiyun 	}
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun 
siw_invalidate_stag(struct ib_pd * pd,u32 stag)292*4882a593Smuzhiyun int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun 	struct siw_device *sdev = to_siw_dev(pd->device);
295*4882a593Smuzhiyun 	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
296*4882a593Smuzhiyun 	int rv = 0;
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	if (unlikely(!mem)) {
299*4882a593Smuzhiyun 		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
300*4882a593Smuzhiyun 		return -EINVAL;
301*4882a593Smuzhiyun 	}
302*4882a593Smuzhiyun 	if (unlikely(mem->pd != pd)) {
303*4882a593Smuzhiyun 		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
304*4882a593Smuzhiyun 		rv = -EACCES;
305*4882a593Smuzhiyun 		goto out;
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 	/*
308*4882a593Smuzhiyun 	 * Per RDMA verbs definition, an STag may already be in invalid
309*4882a593Smuzhiyun 	 * state if invalidation is requested. So no state check here.
310*4882a593Smuzhiyun 	 */
311*4882a593Smuzhiyun 	mem->stag_valid = 0;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
314*4882a593Smuzhiyun out:
315*4882a593Smuzhiyun 	siw_mem_put(mem);
316*4882a593Smuzhiyun 	return rv;
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun /*
320*4882a593Smuzhiyun  * Gets physical address backed by PBL element. Address is referenced
321*4882a593Smuzhiyun  * by linear byte offset into list of variably sized PB elements.
322*4882a593Smuzhiyun  * Optionally, provides remaining len within current element, and
323*4882a593Smuzhiyun  * current PBL index for later resume at same element.
324*4882a593Smuzhiyun  */
siw_pbl_get_buffer(struct siw_pbl * pbl,u64 off,int * len,int * idx)325*4882a593Smuzhiyun dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
326*4882a593Smuzhiyun {
327*4882a593Smuzhiyun 	int i = idx ? *idx : 0;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	while (i < pbl->num_buf) {
330*4882a593Smuzhiyun 		struct siw_pble *pble = &pbl->pbe[i];
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 		if (pble->pbl_off + pble->size > off) {
333*4882a593Smuzhiyun 			u64 pble_off = off - pble->pbl_off;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 			if (len)
336*4882a593Smuzhiyun 				*len = pble->size - pble_off;
337*4882a593Smuzhiyun 			if (idx)
338*4882a593Smuzhiyun 				*idx = i;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 			return pble->addr + pble_off;
341*4882a593Smuzhiyun 		}
342*4882a593Smuzhiyun 		i++;
343*4882a593Smuzhiyun 	}
344*4882a593Smuzhiyun 	if (len)
345*4882a593Smuzhiyun 		*len = 0;
346*4882a593Smuzhiyun 	return 0;
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun 
siw_pbl_alloc(u32 num_buf)349*4882a593Smuzhiyun struct siw_pbl *siw_pbl_alloc(u32 num_buf)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun 	struct siw_pbl *pbl;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	if (num_buf == 0)
354*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
357*4882a593Smuzhiyun 	if (!pbl)
358*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	pbl->max_buf = num_buf;
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	return pbl;
363*4882a593Smuzhiyun }
364*4882a593Smuzhiyun 
siw_umem_get(u64 start,u64 len,bool writable)365*4882a593Smuzhiyun struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
366*4882a593Smuzhiyun {
367*4882a593Smuzhiyun 	struct siw_umem *umem;
368*4882a593Smuzhiyun 	struct mm_struct *mm_s;
369*4882a593Smuzhiyun 	u64 first_page_va;
370*4882a593Smuzhiyun 	unsigned long mlock_limit;
371*4882a593Smuzhiyun 	unsigned int foll_flags = FOLL_WRITE;
372*4882a593Smuzhiyun 	int num_pages, num_chunks, i, rv = 0;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	if (!can_do_mlock())
375*4882a593Smuzhiyun 		return ERR_PTR(-EPERM);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	if (!len)
378*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	first_page_va = start & PAGE_MASK;
381*4882a593Smuzhiyun 	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
382*4882a593Smuzhiyun 	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
385*4882a593Smuzhiyun 	if (!umem)
386*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	mm_s = current->mm;
389*4882a593Smuzhiyun 	umem->owning_mm = mm_s;
390*4882a593Smuzhiyun 	umem->writable = writable;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	mmgrab(mm_s);
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	if (!writable)
395*4882a593Smuzhiyun 		foll_flags |= FOLL_FORCE;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	mmap_read_lock(mm_s);
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
402*4882a593Smuzhiyun 		rv = -ENOMEM;
403*4882a593Smuzhiyun 		goto out_sem_up;
404*4882a593Smuzhiyun 	}
405*4882a593Smuzhiyun 	umem->fp_addr = first_page_va;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	umem->page_chunk =
408*4882a593Smuzhiyun 		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
409*4882a593Smuzhiyun 	if (!umem->page_chunk) {
410*4882a593Smuzhiyun 		rv = -ENOMEM;
411*4882a593Smuzhiyun 		goto out_sem_up;
412*4882a593Smuzhiyun 	}
413*4882a593Smuzhiyun 	for (i = 0; num_pages; i++) {
414*4882a593Smuzhiyun 		int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 		umem->page_chunk[i].plist =
417*4882a593Smuzhiyun 			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
418*4882a593Smuzhiyun 		if (!umem->page_chunk[i].plist) {
419*4882a593Smuzhiyun 			rv = -ENOMEM;
420*4882a593Smuzhiyun 			goto out_sem_up;
421*4882a593Smuzhiyun 		}
422*4882a593Smuzhiyun 		got = 0;
423*4882a593Smuzhiyun 		while (nents) {
424*4882a593Smuzhiyun 			struct page **plist = &umem->page_chunk[i].plist[got];
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 			rv = pin_user_pages(first_page_va, nents,
427*4882a593Smuzhiyun 					    foll_flags | FOLL_LONGTERM,
428*4882a593Smuzhiyun 					    plist, NULL);
429*4882a593Smuzhiyun 			if (rv < 0)
430*4882a593Smuzhiyun 				goto out_sem_up;
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 			umem->num_pages += rv;
433*4882a593Smuzhiyun 			atomic64_add(rv, &mm_s->pinned_vm);
434*4882a593Smuzhiyun 			first_page_va += rv * PAGE_SIZE;
435*4882a593Smuzhiyun 			nents -= rv;
436*4882a593Smuzhiyun 			got += rv;
437*4882a593Smuzhiyun 		}
438*4882a593Smuzhiyun 		num_pages -= got;
439*4882a593Smuzhiyun 	}
440*4882a593Smuzhiyun out_sem_up:
441*4882a593Smuzhiyun 	mmap_read_unlock(mm_s);
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	if (rv > 0)
444*4882a593Smuzhiyun 		return umem;
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	siw_umem_release(umem, false);
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	return ERR_PTR(rv);
449*4882a593Smuzhiyun }
450