xref: /OK3568_Linux_fs/kernel/net/rds/message.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright (c) 2006, 2020 Oracle and/or its affiliates.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This software is available to you under a choice of one of two
5*4882a593Smuzhiyun  * licenses.  You may choose to be licensed under the terms of the GNU
6*4882a593Smuzhiyun  * General Public License (GPL) Version 2, available from the file
7*4882a593Smuzhiyun  * COPYING in the main directory of this source tree, or the
8*4882a593Smuzhiyun  * OpenIB.org BSD license below:
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  *     Redistribution and use in source and binary forms, with or
11*4882a593Smuzhiyun  *     without modification, are permitted provided that the following
12*4882a593Smuzhiyun  *     conditions are met:
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  *      - Redistributions of source code must retain the above
15*4882a593Smuzhiyun  *        copyright notice, this list of conditions and the following
16*4882a593Smuzhiyun  *        disclaimer.
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  *      - Redistributions in binary form must reproduce the above
19*4882a593Smuzhiyun  *        copyright notice, this list of conditions and the following
20*4882a593Smuzhiyun  *        disclaimer in the documentation and/or other materials
21*4882a593Smuzhiyun  *        provided with the distribution.
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24*4882a593Smuzhiyun  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25*4882a593Smuzhiyun  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26*4882a593Smuzhiyun  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27*4882a593Smuzhiyun  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28*4882a593Smuzhiyun  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29*4882a593Smuzhiyun  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30*4882a593Smuzhiyun  * SOFTWARE.
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  */
33*4882a593Smuzhiyun #include <linux/kernel.h>
34*4882a593Smuzhiyun #include <linux/slab.h>
35*4882a593Smuzhiyun #include <linux/export.h>
36*4882a593Smuzhiyun #include <linux/skbuff.h>
37*4882a593Smuzhiyun #include <linux/list.h>
38*4882a593Smuzhiyun #include <linux/errqueue.h>
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #include "rds.h"
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
43*4882a593Smuzhiyun [RDS_EXTHDR_NONE]	= 0,
44*4882a593Smuzhiyun [RDS_EXTHDR_VERSION]	= sizeof(struct rds_ext_header_version),
45*4882a593Smuzhiyun [RDS_EXTHDR_RDMA]	= sizeof(struct rds_ext_header_rdma),
46*4882a593Smuzhiyun [RDS_EXTHDR_RDMA_DEST]	= sizeof(struct rds_ext_header_rdma_dest),
47*4882a593Smuzhiyun [RDS_EXTHDR_NPATHS]	= sizeof(u16),
48*4882a593Smuzhiyun [RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun 
rds_message_addref(struct rds_message * rm)51*4882a593Smuzhiyun void rds_message_addref(struct rds_message *rm)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun 	rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
54*4882a593Smuzhiyun 	refcount_inc(&rm->m_refcount);
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_addref);
57*4882a593Smuzhiyun 
rds_zcookie_add(struct rds_msg_zcopy_info * info,u32 cookie)58*4882a593Smuzhiyun static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	struct rds_zcopy_cookies *ck = &info->zcookies;
61*4882a593Smuzhiyun 	int ncookies = ck->num;
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	if (ncookies == RDS_MAX_ZCOOKIES)
64*4882a593Smuzhiyun 		return false;
65*4882a593Smuzhiyun 	ck->cookies[ncookies] = cookie;
66*4882a593Smuzhiyun 	ck->num =  ++ncookies;
67*4882a593Smuzhiyun 	return true;
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun 
rds_info_from_znotifier(struct rds_znotifier * znotif)70*4882a593Smuzhiyun static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun 	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun 
rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue * q)75*4882a593Smuzhiyun void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun 	unsigned long flags;
78*4882a593Smuzhiyun 	LIST_HEAD(copy);
79*4882a593Smuzhiyun 	struct rds_msg_zcopy_info *info, *tmp;
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	spin_lock_irqsave(&q->lock, flags);
82*4882a593Smuzhiyun 	list_splice(&q->zcookie_head, &copy);
83*4882a593Smuzhiyun 	INIT_LIST_HEAD(&q->zcookie_head);
84*4882a593Smuzhiyun 	spin_unlock_irqrestore(&q->lock, flags);
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
87*4882a593Smuzhiyun 		list_del(&info->rs_zcookie_next);
88*4882a593Smuzhiyun 		kfree(info);
89*4882a593Smuzhiyun 	}
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
rds_rm_zerocopy_callback(struct rds_sock * rs,struct rds_znotifier * znotif)92*4882a593Smuzhiyun static void rds_rm_zerocopy_callback(struct rds_sock *rs,
93*4882a593Smuzhiyun 				     struct rds_znotifier *znotif)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun 	struct rds_msg_zcopy_info *info;
96*4882a593Smuzhiyun 	struct rds_msg_zcopy_queue *q;
97*4882a593Smuzhiyun 	u32 cookie = znotif->z_cookie;
98*4882a593Smuzhiyun 	struct rds_zcopy_cookies *ck;
99*4882a593Smuzhiyun 	struct list_head *head;
100*4882a593Smuzhiyun 	unsigned long flags;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	mm_unaccount_pinned_pages(&znotif->z_mmp);
103*4882a593Smuzhiyun 	q = &rs->rs_zcookie_queue;
104*4882a593Smuzhiyun 	spin_lock_irqsave(&q->lock, flags);
105*4882a593Smuzhiyun 	head = &q->zcookie_head;
106*4882a593Smuzhiyun 	if (!list_empty(head)) {
107*4882a593Smuzhiyun 		info = list_entry(head, struct rds_msg_zcopy_info,
108*4882a593Smuzhiyun 				  rs_zcookie_next);
109*4882a593Smuzhiyun 		if (info && rds_zcookie_add(info, cookie)) {
110*4882a593Smuzhiyun 			spin_unlock_irqrestore(&q->lock, flags);
111*4882a593Smuzhiyun 			kfree(rds_info_from_znotifier(znotif));
112*4882a593Smuzhiyun 			/* caller invokes rds_wake_sk_sleep() */
113*4882a593Smuzhiyun 			return;
114*4882a593Smuzhiyun 		}
115*4882a593Smuzhiyun 	}
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	info = rds_info_from_znotifier(znotif);
118*4882a593Smuzhiyun 	ck = &info->zcookies;
119*4882a593Smuzhiyun 	memset(ck, 0, sizeof(*ck));
120*4882a593Smuzhiyun 	WARN_ON(!rds_zcookie_add(info, cookie));
121*4882a593Smuzhiyun 	list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	spin_unlock_irqrestore(&q->lock, flags);
124*4882a593Smuzhiyun 	/* caller invokes rds_wake_sk_sleep() */
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun  * This relies on dma_map_sg() not touching sg[].page during merging.
129*4882a593Smuzhiyun  */
rds_message_purge(struct rds_message * rm)130*4882a593Smuzhiyun static void rds_message_purge(struct rds_message *rm)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun 	unsigned long i, flags;
133*4882a593Smuzhiyun 	bool zcopy = false;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
136*4882a593Smuzhiyun 		return;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	spin_lock_irqsave(&rm->m_rs_lock, flags);
139*4882a593Smuzhiyun 	if (rm->m_rs) {
140*4882a593Smuzhiyun 		struct rds_sock *rs = rm->m_rs;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 		if (rm->data.op_mmp_znotifier) {
143*4882a593Smuzhiyun 			zcopy = true;
144*4882a593Smuzhiyun 			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
145*4882a593Smuzhiyun 			rds_wake_sk_sleep(rs);
146*4882a593Smuzhiyun 			rm->data.op_mmp_znotifier = NULL;
147*4882a593Smuzhiyun 		}
148*4882a593Smuzhiyun 		sock_put(rds_rs_to_sk(rs));
149*4882a593Smuzhiyun 		rm->m_rs = NULL;
150*4882a593Smuzhiyun 	}
151*4882a593Smuzhiyun 	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	for (i = 0; i < rm->data.op_nents; i++) {
154*4882a593Smuzhiyun 		/* XXX will have to put_page for page refs */
155*4882a593Smuzhiyun 		if (!zcopy)
156*4882a593Smuzhiyun 			__free_page(sg_page(&rm->data.op_sg[i]));
157*4882a593Smuzhiyun 		else
158*4882a593Smuzhiyun 			put_page(sg_page(&rm->data.op_sg[i]));
159*4882a593Smuzhiyun 	}
160*4882a593Smuzhiyun 	rm->data.op_nents = 0;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	if (rm->rdma.op_active)
163*4882a593Smuzhiyun 		rds_rdma_free_op(&rm->rdma);
164*4882a593Smuzhiyun 	if (rm->rdma.op_rdma_mr)
165*4882a593Smuzhiyun 		kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	if (rm->atomic.op_active)
168*4882a593Smuzhiyun 		rds_atomic_free_op(&rm->atomic);
169*4882a593Smuzhiyun 	if (rm->atomic.op_rdma_mr)
170*4882a593Smuzhiyun 		kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final);
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun 
rds_message_put(struct rds_message * rm)173*4882a593Smuzhiyun void rds_message_put(struct rds_message *rm)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun 	rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
176*4882a593Smuzhiyun 	WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
177*4882a593Smuzhiyun 	if (refcount_dec_and_test(&rm->m_refcount)) {
178*4882a593Smuzhiyun 		BUG_ON(!list_empty(&rm->m_sock_item));
179*4882a593Smuzhiyun 		BUG_ON(!list_empty(&rm->m_conn_item));
180*4882a593Smuzhiyun 		rds_message_purge(rm);
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 		kfree(rm);
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_put);
186*4882a593Smuzhiyun 
rds_message_populate_header(struct rds_header * hdr,__be16 sport,__be16 dport,u64 seq)187*4882a593Smuzhiyun void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
188*4882a593Smuzhiyun 				 __be16 dport, u64 seq)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun 	hdr->h_flags = 0;
191*4882a593Smuzhiyun 	hdr->h_sport = sport;
192*4882a593Smuzhiyun 	hdr->h_dport = dport;
193*4882a593Smuzhiyun 	hdr->h_sequence = cpu_to_be64(seq);
194*4882a593Smuzhiyun 	hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_populate_header);
197*4882a593Smuzhiyun 
rds_message_add_extension(struct rds_header * hdr,unsigned int type,const void * data,unsigned int len)198*4882a593Smuzhiyun int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
199*4882a593Smuzhiyun 			      const void *data, unsigned int len)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	unsigned int ext_len = sizeof(u8) + len;
202*4882a593Smuzhiyun 	unsigned char *dst;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	/* For now, refuse to add more than one extension header */
205*4882a593Smuzhiyun 	if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
206*4882a593Smuzhiyun 		return 0;
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type])
209*4882a593Smuzhiyun 		return 0;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	if (ext_len >= RDS_HEADER_EXT_SPACE)
212*4882a593Smuzhiyun 		return 0;
213*4882a593Smuzhiyun 	dst = hdr->h_exthdr;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	*dst++ = type;
216*4882a593Smuzhiyun 	memcpy(dst, data, len);
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	dst[len] = RDS_EXTHDR_NONE;
219*4882a593Smuzhiyun 	return 1;
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_add_extension);
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun /*
224*4882a593Smuzhiyun  * If a message has extension headers, retrieve them here.
225*4882a593Smuzhiyun  * Call like this:
226*4882a593Smuzhiyun  *
227*4882a593Smuzhiyun  * unsigned int pos = 0;
228*4882a593Smuzhiyun  *
229*4882a593Smuzhiyun  * while (1) {
230*4882a593Smuzhiyun  *	buflen = sizeof(buffer);
231*4882a593Smuzhiyun  *	type = rds_message_next_extension(hdr, &pos, buffer, &buflen);
232*4882a593Smuzhiyun  *	if (type == RDS_EXTHDR_NONE)
233*4882a593Smuzhiyun  *		break;
234*4882a593Smuzhiyun  *	...
235*4882a593Smuzhiyun  * }
236*4882a593Smuzhiyun  */
rds_message_next_extension(struct rds_header * hdr,unsigned int * pos,void * buf,unsigned int * buflen)237*4882a593Smuzhiyun int rds_message_next_extension(struct rds_header *hdr,
238*4882a593Smuzhiyun 		unsigned int *pos, void *buf, unsigned int *buflen)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	unsigned int offset, ext_type, ext_len;
241*4882a593Smuzhiyun 	u8 *src = hdr->h_exthdr;
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	offset = *pos;
244*4882a593Smuzhiyun 	if (offset >= RDS_HEADER_EXT_SPACE)
245*4882a593Smuzhiyun 		goto none;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	/* Get the extension type and length. For now, the
248*4882a593Smuzhiyun 	 * length is implied by the extension type. */
249*4882a593Smuzhiyun 	ext_type = src[offset++];
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX)
252*4882a593Smuzhiyun 		goto none;
253*4882a593Smuzhiyun 	ext_len = rds_exthdr_size[ext_type];
254*4882a593Smuzhiyun 	if (offset + ext_len > RDS_HEADER_EXT_SPACE)
255*4882a593Smuzhiyun 		goto none;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	*pos = offset + ext_len;
258*4882a593Smuzhiyun 	if (ext_len < *buflen)
259*4882a593Smuzhiyun 		*buflen = ext_len;
260*4882a593Smuzhiyun 	memcpy(buf, src + offset, *buflen);
261*4882a593Smuzhiyun 	return ext_type;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun none:
264*4882a593Smuzhiyun 	*pos = RDS_HEADER_EXT_SPACE;
265*4882a593Smuzhiyun 	*buflen = 0;
266*4882a593Smuzhiyun 	return RDS_EXTHDR_NONE;
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
rds_message_add_rdma_dest_extension(struct rds_header * hdr,u32 r_key,u32 offset)269*4882a593Smuzhiyun int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun 	struct rds_ext_header_rdma_dest ext_hdr;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
274*4882a593Smuzhiyun 	ext_hdr.h_rdma_offset = cpu_to_be32(offset);
275*4882a593Smuzhiyun 	return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun /*
280*4882a593Smuzhiyun  * Each rds_message is allocated with extra space for the scatterlist entries
281*4882a593Smuzhiyun  * rds ops will need. This is to minimize memory allocation count. Then, each rds op
282*4882a593Smuzhiyun  * can grab SGs when initializing its part of the rds_message.
283*4882a593Smuzhiyun  */
rds_message_alloc(unsigned int extra_len,gfp_t gfp)284*4882a593Smuzhiyun struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
285*4882a593Smuzhiyun {
286*4882a593Smuzhiyun 	struct rds_message *rm;
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message))
289*4882a593Smuzhiyun 		return NULL;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
292*4882a593Smuzhiyun 	if (!rm)
293*4882a593Smuzhiyun 		goto out;
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	rm->m_used_sgs = 0;
296*4882a593Smuzhiyun 	rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	refcount_set(&rm->m_refcount, 1);
299*4882a593Smuzhiyun 	INIT_LIST_HEAD(&rm->m_sock_item);
300*4882a593Smuzhiyun 	INIT_LIST_HEAD(&rm->m_conn_item);
301*4882a593Smuzhiyun 	spin_lock_init(&rm->m_rs_lock);
302*4882a593Smuzhiyun 	init_waitqueue_head(&rm->m_flush_wait);
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun out:
305*4882a593Smuzhiyun 	return rm;
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun /*
309*4882a593Smuzhiyun  * RDS ops use this to grab SG entries from the rm's sg pool.
310*4882a593Smuzhiyun  */
rds_message_alloc_sgs(struct rds_message * rm,int nents)311*4882a593Smuzhiyun struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
312*4882a593Smuzhiyun {
313*4882a593Smuzhiyun 	struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
314*4882a593Smuzhiyun 	struct scatterlist *sg_ret;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	if (nents <= 0) {
317*4882a593Smuzhiyun 		pr_warn("rds: alloc sgs failed! nents <= 0\n");
318*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
319*4882a593Smuzhiyun 	}
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	if (rm->m_used_sgs + nents > rm->m_total_sgs) {
322*4882a593Smuzhiyun 		pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n",
323*4882a593Smuzhiyun 			rm->m_total_sgs, rm->m_used_sgs, nents);
324*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
325*4882a593Smuzhiyun 	}
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	sg_ret = &sg_first[rm->m_used_sgs];
328*4882a593Smuzhiyun 	sg_init_table(sg_ret, nents);
329*4882a593Smuzhiyun 	rm->m_used_sgs += nents;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	return sg_ret;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun 
rds_message_map_pages(unsigned long * page_addrs,unsigned int total_len)334*4882a593Smuzhiyun struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	struct rds_message *rm;
337*4882a593Smuzhiyun 	unsigned int i;
338*4882a593Smuzhiyun 	int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE);
339*4882a593Smuzhiyun 	int extra_bytes = num_sgs * sizeof(struct scatterlist);
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
342*4882a593Smuzhiyun 	if (!rm)
343*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
346*4882a593Smuzhiyun 	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
347*4882a593Smuzhiyun 	rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE);
348*4882a593Smuzhiyun 	rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
349*4882a593Smuzhiyun 	if (IS_ERR(rm->data.op_sg)) {
350*4882a593Smuzhiyun 		void *err = ERR_CAST(rm->data.op_sg);
351*4882a593Smuzhiyun 		rds_message_put(rm);
352*4882a593Smuzhiyun 		return err;
353*4882a593Smuzhiyun 	}
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	for (i = 0; i < rm->data.op_nents; ++i) {
356*4882a593Smuzhiyun 		sg_set_page(&rm->data.op_sg[i],
357*4882a593Smuzhiyun 				virt_to_page(page_addrs[i]),
358*4882a593Smuzhiyun 				PAGE_SIZE, 0);
359*4882a593Smuzhiyun 	}
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	return rm;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun 
rds_message_zcopy_from_user(struct rds_message * rm,struct iov_iter * from)364*4882a593Smuzhiyun static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun 	struct scatterlist *sg;
367*4882a593Smuzhiyun 	int ret = 0;
368*4882a593Smuzhiyun 	int length = iov_iter_count(from);
369*4882a593Smuzhiyun 	int total_copied = 0;
370*4882a593Smuzhiyun 	struct rds_msg_zcopy_info *info;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	/*
375*4882a593Smuzhiyun 	 * now allocate and copy in the data payload.
376*4882a593Smuzhiyun 	 */
377*4882a593Smuzhiyun 	sg = rm->data.op_sg;
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	info = kzalloc(sizeof(*info), GFP_KERNEL);
380*4882a593Smuzhiyun 	if (!info)
381*4882a593Smuzhiyun 		return -ENOMEM;
382*4882a593Smuzhiyun 	INIT_LIST_HEAD(&info->rs_zcookie_next);
383*4882a593Smuzhiyun 	rm->data.op_mmp_znotifier = &info->znotif;
384*4882a593Smuzhiyun 	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
385*4882a593Smuzhiyun 				    length)) {
386*4882a593Smuzhiyun 		ret = -ENOMEM;
387*4882a593Smuzhiyun 		goto err;
388*4882a593Smuzhiyun 	}
389*4882a593Smuzhiyun 	while (iov_iter_count(from)) {
390*4882a593Smuzhiyun 		struct page *pages;
391*4882a593Smuzhiyun 		size_t start;
392*4882a593Smuzhiyun 		ssize_t copied;
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
395*4882a593Smuzhiyun 					    1, &start);
396*4882a593Smuzhiyun 		if (copied < 0) {
397*4882a593Smuzhiyun 			struct mmpin *mmp;
398*4882a593Smuzhiyun 			int i;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 			for (i = 0; i < rm->data.op_nents; i++)
401*4882a593Smuzhiyun 				put_page(sg_page(&rm->data.op_sg[i]));
402*4882a593Smuzhiyun 			mmp = &rm->data.op_mmp_znotifier->z_mmp;
403*4882a593Smuzhiyun 			mm_unaccount_pinned_pages(mmp);
404*4882a593Smuzhiyun 			ret = -EFAULT;
405*4882a593Smuzhiyun 			goto err;
406*4882a593Smuzhiyun 		}
407*4882a593Smuzhiyun 		total_copied += copied;
408*4882a593Smuzhiyun 		iov_iter_advance(from, copied);
409*4882a593Smuzhiyun 		length -= copied;
410*4882a593Smuzhiyun 		sg_set_page(sg, pages, copied, start);
411*4882a593Smuzhiyun 		rm->data.op_nents++;
412*4882a593Smuzhiyun 		sg++;
413*4882a593Smuzhiyun 	}
414*4882a593Smuzhiyun 	WARN_ON_ONCE(length != 0);
415*4882a593Smuzhiyun 	return ret;
416*4882a593Smuzhiyun err:
417*4882a593Smuzhiyun 	kfree(info);
418*4882a593Smuzhiyun 	rm->data.op_mmp_znotifier = NULL;
419*4882a593Smuzhiyun 	return ret;
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun 
rds_message_copy_from_user(struct rds_message * rm,struct iov_iter * from,bool zcopy)422*4882a593Smuzhiyun int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
423*4882a593Smuzhiyun 			       bool zcopy)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun 	unsigned long to_copy, nbytes;
426*4882a593Smuzhiyun 	unsigned long sg_off;
427*4882a593Smuzhiyun 	struct scatterlist *sg;
428*4882a593Smuzhiyun 	int ret = 0;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	/* now allocate and copy in the data payload.  */
433*4882a593Smuzhiyun 	sg = rm->data.op_sg;
434*4882a593Smuzhiyun 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	if (zcopy)
437*4882a593Smuzhiyun 		return rds_message_zcopy_from_user(rm, from);
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	while (iov_iter_count(from)) {
440*4882a593Smuzhiyun 		if (!sg_page(sg)) {
441*4882a593Smuzhiyun 			ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
442*4882a593Smuzhiyun 						       GFP_HIGHUSER);
443*4882a593Smuzhiyun 			if (ret)
444*4882a593Smuzhiyun 				return ret;
445*4882a593Smuzhiyun 			rm->data.op_nents++;
446*4882a593Smuzhiyun 			sg_off = 0;
447*4882a593Smuzhiyun 		}
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 		to_copy = min_t(unsigned long, iov_iter_count(from),
450*4882a593Smuzhiyun 				sg->length - sg_off);
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 		rds_stats_add(s_copy_from_user, to_copy);
453*4882a593Smuzhiyun 		nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off,
454*4882a593Smuzhiyun 					     to_copy, from);
455*4882a593Smuzhiyun 		if (nbytes != to_copy)
456*4882a593Smuzhiyun 			return -EFAULT;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 		sg_off += to_copy;
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 		if (sg_off == sg->length)
461*4882a593Smuzhiyun 			sg++;
462*4882a593Smuzhiyun 	}
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 	return ret;
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun 
rds_message_inc_copy_to_user(struct rds_incoming * inc,struct iov_iter * to)467*4882a593Smuzhiyun int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	struct rds_message *rm;
470*4882a593Smuzhiyun 	struct scatterlist *sg;
471*4882a593Smuzhiyun 	unsigned long to_copy;
472*4882a593Smuzhiyun 	unsigned long vec_off;
473*4882a593Smuzhiyun 	int copied;
474*4882a593Smuzhiyun 	int ret;
475*4882a593Smuzhiyun 	u32 len;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 	rm = container_of(inc, struct rds_message, m_inc);
478*4882a593Smuzhiyun 	len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	sg = rm->data.op_sg;
481*4882a593Smuzhiyun 	vec_off = 0;
482*4882a593Smuzhiyun 	copied = 0;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	while (iov_iter_count(to) && copied < len) {
485*4882a593Smuzhiyun 		to_copy = min_t(unsigned long, iov_iter_count(to),
486*4882a593Smuzhiyun 				sg->length - vec_off);
487*4882a593Smuzhiyun 		to_copy = min_t(unsigned long, to_copy, len - copied);
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 		rds_stats_add(s_copy_to_user, to_copy);
490*4882a593Smuzhiyun 		ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off,
491*4882a593Smuzhiyun 					to_copy, to);
492*4882a593Smuzhiyun 		if (ret != to_copy)
493*4882a593Smuzhiyun 			return -EFAULT;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 		vec_off += to_copy;
496*4882a593Smuzhiyun 		copied += to_copy;
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 		if (vec_off == sg->length) {
499*4882a593Smuzhiyun 			vec_off = 0;
500*4882a593Smuzhiyun 			sg++;
501*4882a593Smuzhiyun 		}
502*4882a593Smuzhiyun 	}
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	return copied;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun /*
508*4882a593Smuzhiyun  * If the message is still on the send queue, wait until the transport
509*4882a593Smuzhiyun  * is done with it. This is particularly important for RDMA operations.
510*4882a593Smuzhiyun  */
rds_message_wait(struct rds_message * rm)511*4882a593Smuzhiyun void rds_message_wait(struct rds_message *rm)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun 	wait_event_interruptible(rm->m_flush_wait,
514*4882a593Smuzhiyun 			!test_bit(RDS_MSG_MAPPED, &rm->m_flags));
515*4882a593Smuzhiyun }
516*4882a593Smuzhiyun 
rds_message_unmapped(struct rds_message * rm)517*4882a593Smuzhiyun void rds_message_unmapped(struct rds_message *rm)
518*4882a593Smuzhiyun {
519*4882a593Smuzhiyun 	clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
520*4882a593Smuzhiyun 	wake_up_interruptible(&rm->m_flush_wait);
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(rds_message_unmapped);
523