xref: /OK3568_Linux_fs/kernel/drivers/nvme/target/rdma.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * NVMe over Fabrics RDMA target.
4*4882a593Smuzhiyun  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun #include <linux/atomic.h>
8*4882a593Smuzhiyun #include <linux/ctype.h>
9*4882a593Smuzhiyun #include <linux/delay.h>
10*4882a593Smuzhiyun #include <linux/err.h>
11*4882a593Smuzhiyun #include <linux/init.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <linux/nvme.h>
14*4882a593Smuzhiyun #include <linux/slab.h>
15*4882a593Smuzhiyun #include <linux/string.h>
16*4882a593Smuzhiyun #include <linux/wait.h>
17*4882a593Smuzhiyun #include <linux/inet.h>
18*4882a593Smuzhiyun #include <asm/unaligned.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <rdma/ib_verbs.h>
21*4882a593Smuzhiyun #include <rdma/rdma_cm.h>
22*4882a593Smuzhiyun #include <rdma/rw.h>
23*4882a593Smuzhiyun #include <rdma/ib_cm.h>
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #include <linux/nvme-rdma.h>
26*4882a593Smuzhiyun #include "nvmet.h"
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun  * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
30*4882a593Smuzhiyun  */
31*4882a593Smuzhiyun #define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
32*4882a593Smuzhiyun #define NVMET_RDMA_MAX_INLINE_SGE		4
33*4882a593Smuzhiyun #define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun /* Assume mpsmin == device_page_size == 4KB */
36*4882a593Smuzhiyun #define NVMET_RDMA_MAX_MDTS			8
37*4882a593Smuzhiyun #define NVMET_RDMA_MAX_METADATA_MDTS		5
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun struct nvmet_rdma_srq;
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun struct nvmet_rdma_cmd {
42*4882a593Smuzhiyun 	struct ib_sge		sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
43*4882a593Smuzhiyun 	struct ib_cqe		cqe;
44*4882a593Smuzhiyun 	struct ib_recv_wr	wr;
45*4882a593Smuzhiyun 	struct scatterlist	inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
46*4882a593Smuzhiyun 	struct nvme_command     *nvme_cmd;
47*4882a593Smuzhiyun 	struct nvmet_rdma_queue	*queue;
48*4882a593Smuzhiyun 	struct nvmet_rdma_srq   *nsrq;
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun enum {
52*4882a593Smuzhiyun 	NVMET_RDMA_REQ_INLINE_DATA	= (1 << 0),
53*4882a593Smuzhiyun 	NVMET_RDMA_REQ_INVALIDATE_RKEY	= (1 << 1),
54*4882a593Smuzhiyun };
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun struct nvmet_rdma_rsp {
57*4882a593Smuzhiyun 	struct ib_sge		send_sge;
58*4882a593Smuzhiyun 	struct ib_cqe		send_cqe;
59*4882a593Smuzhiyun 	struct ib_send_wr	send_wr;
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	struct nvmet_rdma_cmd	*cmd;
62*4882a593Smuzhiyun 	struct nvmet_rdma_queue	*queue;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	struct ib_cqe		read_cqe;
65*4882a593Smuzhiyun 	struct ib_cqe		write_cqe;
66*4882a593Smuzhiyun 	struct rdma_rw_ctx	rw;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	struct nvmet_req	req;
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	bool			allocated;
71*4882a593Smuzhiyun 	u8			n_rdma;
72*4882a593Smuzhiyun 	u32			flags;
73*4882a593Smuzhiyun 	u32			invalidate_rkey;
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	struct list_head	wait_list;
76*4882a593Smuzhiyun 	struct list_head	free_list;
77*4882a593Smuzhiyun };
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun enum nvmet_rdma_queue_state {
80*4882a593Smuzhiyun 	NVMET_RDMA_Q_CONNECTING,
81*4882a593Smuzhiyun 	NVMET_RDMA_Q_LIVE,
82*4882a593Smuzhiyun 	NVMET_RDMA_Q_DISCONNECTING,
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun struct nvmet_rdma_queue {
86*4882a593Smuzhiyun 	struct rdma_cm_id	*cm_id;
87*4882a593Smuzhiyun 	struct ib_qp		*qp;
88*4882a593Smuzhiyun 	struct nvmet_port	*port;
89*4882a593Smuzhiyun 	struct ib_cq		*cq;
90*4882a593Smuzhiyun 	atomic_t		sq_wr_avail;
91*4882a593Smuzhiyun 	struct nvmet_rdma_device *dev;
92*4882a593Smuzhiyun 	struct nvmet_rdma_srq   *nsrq;
93*4882a593Smuzhiyun 	spinlock_t		state_lock;
94*4882a593Smuzhiyun 	enum nvmet_rdma_queue_state state;
95*4882a593Smuzhiyun 	struct nvmet_cq		nvme_cq;
96*4882a593Smuzhiyun 	struct nvmet_sq		nvme_sq;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	struct nvmet_rdma_rsp	*rsps;
99*4882a593Smuzhiyun 	struct list_head	free_rsps;
100*4882a593Smuzhiyun 	spinlock_t		rsps_lock;
101*4882a593Smuzhiyun 	struct nvmet_rdma_cmd	*cmds;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	struct work_struct	release_work;
104*4882a593Smuzhiyun 	struct list_head	rsp_wait_list;
105*4882a593Smuzhiyun 	struct list_head	rsp_wr_wait_list;
106*4882a593Smuzhiyun 	spinlock_t		rsp_wr_wait_lock;
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	int			idx;
109*4882a593Smuzhiyun 	int			host_qid;
110*4882a593Smuzhiyun 	int			comp_vector;
111*4882a593Smuzhiyun 	int			recv_queue_size;
112*4882a593Smuzhiyun 	int			send_queue_size;
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	struct list_head	queue_list;
115*4882a593Smuzhiyun };
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun struct nvmet_rdma_port {
118*4882a593Smuzhiyun 	struct nvmet_port	*nport;
119*4882a593Smuzhiyun 	struct sockaddr_storage addr;
120*4882a593Smuzhiyun 	struct rdma_cm_id	*cm_id;
121*4882a593Smuzhiyun 	struct delayed_work	repair_work;
122*4882a593Smuzhiyun };
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun struct nvmet_rdma_srq {
125*4882a593Smuzhiyun 	struct ib_srq            *srq;
126*4882a593Smuzhiyun 	struct nvmet_rdma_cmd    *cmds;
127*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev;
128*4882a593Smuzhiyun };
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun struct nvmet_rdma_device {
131*4882a593Smuzhiyun 	struct ib_device	*device;
132*4882a593Smuzhiyun 	struct ib_pd		*pd;
133*4882a593Smuzhiyun 	struct nvmet_rdma_srq	**srqs;
134*4882a593Smuzhiyun 	int			srq_count;
135*4882a593Smuzhiyun 	size_t			srq_size;
136*4882a593Smuzhiyun 	struct kref		ref;
137*4882a593Smuzhiyun 	struct list_head	entry;
138*4882a593Smuzhiyun 	int			inline_data_size;
139*4882a593Smuzhiyun 	int			inline_page_count;
140*4882a593Smuzhiyun };
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun static bool nvmet_rdma_use_srq;
143*4882a593Smuzhiyun module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
144*4882a593Smuzhiyun MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun static int srq_size_set(const char *val, const struct kernel_param *kp);
147*4882a593Smuzhiyun static const struct kernel_param_ops srq_size_ops = {
148*4882a593Smuzhiyun 	.set = srq_size_set,
149*4882a593Smuzhiyun 	.get = param_get_int,
150*4882a593Smuzhiyun };
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun static int nvmet_rdma_srq_size = 1024;
153*4882a593Smuzhiyun module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
154*4882a593Smuzhiyun MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun static DEFINE_IDA(nvmet_rdma_queue_ida);
157*4882a593Smuzhiyun static LIST_HEAD(nvmet_rdma_queue_list);
158*4882a593Smuzhiyun static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun static LIST_HEAD(device_list);
161*4882a593Smuzhiyun static DEFINE_MUTEX(device_list_mutex);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
164*4882a593Smuzhiyun static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
165*4882a593Smuzhiyun static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
166*4882a593Smuzhiyun static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
167*4882a593Smuzhiyun static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
168*4882a593Smuzhiyun static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
169*4882a593Smuzhiyun static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
170*4882a593Smuzhiyun static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
171*4882a593Smuzhiyun 				struct nvmet_rdma_rsp *r);
172*4882a593Smuzhiyun static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
173*4882a593Smuzhiyun 				struct nvmet_rdma_rsp *r);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun static const struct nvmet_fabrics_ops nvmet_rdma_ops;
176*4882a593Smuzhiyun 
srq_size_set(const char * val,const struct kernel_param * kp)177*4882a593Smuzhiyun static int srq_size_set(const char *val, const struct kernel_param *kp)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	int n = 0, ret;
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	ret = kstrtoint(val, 10, &n);
182*4882a593Smuzhiyun 	if (ret != 0 || n < 256)
183*4882a593Smuzhiyun 		return -EINVAL;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	return param_set_int(val, kp);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
num_pages(int len)188*4882a593Smuzhiyun static int num_pages(int len)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun 	return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
nvmet_rdma_need_data_in(struct nvmet_rdma_rsp * rsp)193*4882a593Smuzhiyun static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	return nvme_is_write(rsp->req.cmd) &&
196*4882a593Smuzhiyun 		rsp->req.transfer_len &&
197*4882a593Smuzhiyun 		!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun 
nvmet_rdma_need_data_out(struct nvmet_rdma_rsp * rsp)200*4882a593Smuzhiyun static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	return !nvme_is_write(rsp->req.cmd) &&
203*4882a593Smuzhiyun 		rsp->req.transfer_len &&
204*4882a593Smuzhiyun 		!rsp->req.cqe->status &&
205*4882a593Smuzhiyun 		!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun static inline struct nvmet_rdma_rsp *
nvmet_rdma_get_rsp(struct nvmet_rdma_queue * queue)209*4882a593Smuzhiyun nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp;
212*4882a593Smuzhiyun 	unsigned long flags;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	spin_lock_irqsave(&queue->rsps_lock, flags);
215*4882a593Smuzhiyun 	rsp = list_first_entry_or_null(&queue->free_rsps,
216*4882a593Smuzhiyun 				struct nvmet_rdma_rsp, free_list);
217*4882a593Smuzhiyun 	if (likely(rsp))
218*4882a593Smuzhiyun 		list_del(&rsp->free_list);
219*4882a593Smuzhiyun 	spin_unlock_irqrestore(&queue->rsps_lock, flags);
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	if (unlikely(!rsp)) {
222*4882a593Smuzhiyun 		int ret;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 		rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
225*4882a593Smuzhiyun 		if (unlikely(!rsp))
226*4882a593Smuzhiyun 			return NULL;
227*4882a593Smuzhiyun 		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
228*4882a593Smuzhiyun 		if (unlikely(ret)) {
229*4882a593Smuzhiyun 			kfree(rsp);
230*4882a593Smuzhiyun 			return NULL;
231*4882a593Smuzhiyun 		}
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 		rsp->allocated = true;
234*4882a593Smuzhiyun 	}
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	return rsp;
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp * rsp)240*4882a593Smuzhiyun nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
241*4882a593Smuzhiyun {
242*4882a593Smuzhiyun 	unsigned long flags;
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	if (unlikely(rsp->allocated)) {
245*4882a593Smuzhiyun 		nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
246*4882a593Smuzhiyun 		kfree(rsp);
247*4882a593Smuzhiyun 		return;
248*4882a593Smuzhiyun 	}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
251*4882a593Smuzhiyun 	list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
252*4882a593Smuzhiyun 	spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
nvmet_rdma_free_inline_pages(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * c)255*4882a593Smuzhiyun static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
256*4882a593Smuzhiyun 				struct nvmet_rdma_cmd *c)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	struct scatterlist *sg;
259*4882a593Smuzhiyun 	struct ib_sge *sge;
260*4882a593Smuzhiyun 	int i;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	if (!ndev->inline_data_size)
263*4882a593Smuzhiyun 		return;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	sg = c->inline_sg;
266*4882a593Smuzhiyun 	sge = &c->sge[1];
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
269*4882a593Smuzhiyun 		if (sge->length)
270*4882a593Smuzhiyun 			ib_dma_unmap_page(ndev->device, sge->addr,
271*4882a593Smuzhiyun 					sge->length, DMA_FROM_DEVICE);
272*4882a593Smuzhiyun 		if (sg_page(sg))
273*4882a593Smuzhiyun 			__free_page(sg_page(sg));
274*4882a593Smuzhiyun 	}
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * c)277*4882a593Smuzhiyun static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev,
278*4882a593Smuzhiyun 				struct nvmet_rdma_cmd *c)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun 	struct scatterlist *sg;
281*4882a593Smuzhiyun 	struct ib_sge *sge;
282*4882a593Smuzhiyun 	struct page *pg;
283*4882a593Smuzhiyun 	int len;
284*4882a593Smuzhiyun 	int i;
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 	if (!ndev->inline_data_size)
287*4882a593Smuzhiyun 		return 0;
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	sg = c->inline_sg;
290*4882a593Smuzhiyun 	sg_init_table(sg, ndev->inline_page_count);
291*4882a593Smuzhiyun 	sge = &c->sge[1];
292*4882a593Smuzhiyun 	len = ndev->inline_data_size;
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 	for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
295*4882a593Smuzhiyun 		pg = alloc_page(GFP_KERNEL);
296*4882a593Smuzhiyun 		if (!pg)
297*4882a593Smuzhiyun 			goto out_err;
298*4882a593Smuzhiyun 		sg_assign_page(sg, pg);
299*4882a593Smuzhiyun 		sge->addr = ib_dma_map_page(ndev->device,
300*4882a593Smuzhiyun 			pg, 0, PAGE_SIZE, DMA_FROM_DEVICE);
301*4882a593Smuzhiyun 		if (ib_dma_mapping_error(ndev->device, sge->addr))
302*4882a593Smuzhiyun 			goto out_err;
303*4882a593Smuzhiyun 		sge->length = min_t(int, len, PAGE_SIZE);
304*4882a593Smuzhiyun 		sge->lkey = ndev->pd->local_dma_lkey;
305*4882a593Smuzhiyun 		len -= sge->length;
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	return 0;
309*4882a593Smuzhiyun out_err:
310*4882a593Smuzhiyun 	for (; i >= 0; i--, sg--, sge--) {
311*4882a593Smuzhiyun 		if (sge->length)
312*4882a593Smuzhiyun 			ib_dma_unmap_page(ndev->device, sge->addr,
313*4882a593Smuzhiyun 					sge->length, DMA_FROM_DEVICE);
314*4882a593Smuzhiyun 		if (sg_page(sg))
315*4882a593Smuzhiyun 			__free_page(sg_page(sg));
316*4882a593Smuzhiyun 	}
317*4882a593Smuzhiyun 	return -ENOMEM;
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun 
nvmet_rdma_alloc_cmd(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * c,bool admin)320*4882a593Smuzhiyun static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
321*4882a593Smuzhiyun 			struct nvmet_rdma_cmd *c, bool admin)
322*4882a593Smuzhiyun {
323*4882a593Smuzhiyun 	/* NVMe command / RDMA RECV */
324*4882a593Smuzhiyun 	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
325*4882a593Smuzhiyun 	if (!c->nvme_cmd)
326*4882a593Smuzhiyun 		goto out;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	c->sge[0].addr = ib_dma_map_single(ndev->device, c->nvme_cmd,
329*4882a593Smuzhiyun 			sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
330*4882a593Smuzhiyun 	if (ib_dma_mapping_error(ndev->device, c->sge[0].addr))
331*4882a593Smuzhiyun 		goto out_free_cmd;
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	c->sge[0].length = sizeof(*c->nvme_cmd);
334*4882a593Smuzhiyun 	c->sge[0].lkey = ndev->pd->local_dma_lkey;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c))
337*4882a593Smuzhiyun 		goto out_unmap_cmd;
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	c->cqe.done = nvmet_rdma_recv_done;
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	c->wr.wr_cqe = &c->cqe;
342*4882a593Smuzhiyun 	c->wr.sg_list = c->sge;
343*4882a593Smuzhiyun 	c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1;
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	return 0;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun out_unmap_cmd:
348*4882a593Smuzhiyun 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
349*4882a593Smuzhiyun 			sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
350*4882a593Smuzhiyun out_free_cmd:
351*4882a593Smuzhiyun 	kfree(c->nvme_cmd);
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun out:
354*4882a593Smuzhiyun 	return -ENOMEM;
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun 
nvmet_rdma_free_cmd(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * c,bool admin)357*4882a593Smuzhiyun static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
358*4882a593Smuzhiyun 		struct nvmet_rdma_cmd *c, bool admin)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	if (!admin)
361*4882a593Smuzhiyun 		nvmet_rdma_free_inline_pages(ndev, c);
362*4882a593Smuzhiyun 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
363*4882a593Smuzhiyun 				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
364*4882a593Smuzhiyun 	kfree(c->nvme_cmd);
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun static struct nvmet_rdma_cmd *
nvmet_rdma_alloc_cmds(struct nvmet_rdma_device * ndev,int nr_cmds,bool admin)368*4882a593Smuzhiyun nvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev,
369*4882a593Smuzhiyun 		int nr_cmds, bool admin)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun 	struct nvmet_rdma_cmd *cmds;
372*4882a593Smuzhiyun 	int ret = -EINVAL, i;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	cmds = kcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL);
375*4882a593Smuzhiyun 	if (!cmds)
376*4882a593Smuzhiyun 		goto out;
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	for (i = 0; i < nr_cmds; i++) {
379*4882a593Smuzhiyun 		ret = nvmet_rdma_alloc_cmd(ndev, cmds + i, admin);
380*4882a593Smuzhiyun 		if (ret)
381*4882a593Smuzhiyun 			goto out_free;
382*4882a593Smuzhiyun 	}
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	return cmds;
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun out_free:
387*4882a593Smuzhiyun 	while (--i >= 0)
388*4882a593Smuzhiyun 		nvmet_rdma_free_cmd(ndev, cmds + i, admin);
389*4882a593Smuzhiyun 	kfree(cmds);
390*4882a593Smuzhiyun out:
391*4882a593Smuzhiyun 	return ERR_PTR(ret);
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun 
nvmet_rdma_free_cmds(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * cmds,int nr_cmds,bool admin)394*4882a593Smuzhiyun static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
395*4882a593Smuzhiyun 		struct nvmet_rdma_cmd *cmds, int nr_cmds, bool admin)
396*4882a593Smuzhiyun {
397*4882a593Smuzhiyun 	int i;
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	for (i = 0; i < nr_cmds; i++)
400*4882a593Smuzhiyun 		nvmet_rdma_free_cmd(ndev, cmds + i, admin);
401*4882a593Smuzhiyun 	kfree(cmds);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun 
nvmet_rdma_alloc_rsp(struct nvmet_rdma_device * ndev,struct nvmet_rdma_rsp * r)404*4882a593Smuzhiyun static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
405*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *r)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun 	/* NVMe CQE / RDMA SEND */
408*4882a593Smuzhiyun 	r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
409*4882a593Smuzhiyun 	if (!r->req.cqe)
410*4882a593Smuzhiyun 		goto out;
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe,
413*4882a593Smuzhiyun 			sizeof(*r->req.cqe), DMA_TO_DEVICE);
414*4882a593Smuzhiyun 	if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
415*4882a593Smuzhiyun 		goto out_free_rsp;
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	if (!ib_uses_virt_dma(ndev->device))
418*4882a593Smuzhiyun 		r->req.p2p_client = &ndev->device->dev;
419*4882a593Smuzhiyun 	r->send_sge.length = sizeof(*r->req.cqe);
420*4882a593Smuzhiyun 	r->send_sge.lkey = ndev->pd->local_dma_lkey;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	r->send_cqe.done = nvmet_rdma_send_done;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	r->send_wr.wr_cqe = &r->send_cqe;
425*4882a593Smuzhiyun 	r->send_wr.sg_list = &r->send_sge;
426*4882a593Smuzhiyun 	r->send_wr.num_sge = 1;
427*4882a593Smuzhiyun 	r->send_wr.send_flags = IB_SEND_SIGNALED;
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	/* Data In / RDMA READ */
430*4882a593Smuzhiyun 	r->read_cqe.done = nvmet_rdma_read_data_done;
431*4882a593Smuzhiyun 	/* Data Out / RDMA WRITE */
432*4882a593Smuzhiyun 	r->write_cqe.done = nvmet_rdma_write_data_done;
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	return 0;
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun out_free_rsp:
437*4882a593Smuzhiyun 	kfree(r->req.cqe);
438*4882a593Smuzhiyun out:
439*4882a593Smuzhiyun 	return -ENOMEM;
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun 
nvmet_rdma_free_rsp(struct nvmet_rdma_device * ndev,struct nvmet_rdma_rsp * r)442*4882a593Smuzhiyun static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
443*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *r)
444*4882a593Smuzhiyun {
445*4882a593Smuzhiyun 	ib_dma_unmap_single(ndev->device, r->send_sge.addr,
446*4882a593Smuzhiyun 				sizeof(*r->req.cqe), DMA_TO_DEVICE);
447*4882a593Smuzhiyun 	kfree(r->req.cqe);
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun static int
nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue * queue)451*4882a593Smuzhiyun nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
452*4882a593Smuzhiyun {
453*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev = queue->dev;
454*4882a593Smuzhiyun 	int nr_rsps = queue->recv_queue_size * 2;
455*4882a593Smuzhiyun 	int ret = -EINVAL, i;
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
458*4882a593Smuzhiyun 			GFP_KERNEL);
459*4882a593Smuzhiyun 	if (!queue->rsps)
460*4882a593Smuzhiyun 		goto out;
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	for (i = 0; i < nr_rsps; i++) {
463*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 		ret = nvmet_rdma_alloc_rsp(ndev, rsp);
466*4882a593Smuzhiyun 		if (ret)
467*4882a593Smuzhiyun 			goto out_free;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 		list_add_tail(&rsp->free_list, &queue->free_rsps);
470*4882a593Smuzhiyun 	}
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	return 0;
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun out_free:
475*4882a593Smuzhiyun 	while (--i >= 0) {
476*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun 		list_del(&rsp->free_list);
479*4882a593Smuzhiyun 		nvmet_rdma_free_rsp(ndev, rsp);
480*4882a593Smuzhiyun 	}
481*4882a593Smuzhiyun 	kfree(queue->rsps);
482*4882a593Smuzhiyun out:
483*4882a593Smuzhiyun 	return ret;
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun 
nvmet_rdma_free_rsps(struct nvmet_rdma_queue * queue)486*4882a593Smuzhiyun static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
487*4882a593Smuzhiyun {
488*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev = queue->dev;
489*4882a593Smuzhiyun 	int i, nr_rsps = queue->recv_queue_size * 2;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	for (i = 0; i < nr_rsps; i++) {
492*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 		list_del(&rsp->free_list);
495*4882a593Smuzhiyun 		nvmet_rdma_free_rsp(ndev, rsp);
496*4882a593Smuzhiyun 	}
497*4882a593Smuzhiyun 	kfree(queue->rsps);
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun 
nvmet_rdma_post_recv(struct nvmet_rdma_device * ndev,struct nvmet_rdma_cmd * cmd)500*4882a593Smuzhiyun static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
501*4882a593Smuzhiyun 		struct nvmet_rdma_cmd *cmd)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun 	int ret;
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	ib_dma_sync_single_for_device(ndev->device,
506*4882a593Smuzhiyun 		cmd->sge[0].addr, cmd->sge[0].length,
507*4882a593Smuzhiyun 		DMA_FROM_DEVICE);
508*4882a593Smuzhiyun 
509*4882a593Smuzhiyun 	if (cmd->nsrq)
510*4882a593Smuzhiyun 		ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
511*4882a593Smuzhiyun 	else
512*4882a593Smuzhiyun 		ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 	if (unlikely(ret))
515*4882a593Smuzhiyun 		pr_err("post_recv cmd failed\n");
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	return ret;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun 
nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue * queue)520*4882a593Smuzhiyun static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
521*4882a593Smuzhiyun {
522*4882a593Smuzhiyun 	spin_lock(&queue->rsp_wr_wait_lock);
523*4882a593Smuzhiyun 	while (!list_empty(&queue->rsp_wr_wait_list)) {
524*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *rsp;
525*4882a593Smuzhiyun 		bool ret;
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 		rsp = list_entry(queue->rsp_wr_wait_list.next,
528*4882a593Smuzhiyun 				struct nvmet_rdma_rsp, wait_list);
529*4882a593Smuzhiyun 		list_del(&rsp->wait_list);
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 		spin_unlock(&queue->rsp_wr_wait_lock);
532*4882a593Smuzhiyun 		ret = nvmet_rdma_execute_command(rsp);
533*4882a593Smuzhiyun 		spin_lock(&queue->rsp_wr_wait_lock);
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 		if (!ret) {
536*4882a593Smuzhiyun 			list_add(&rsp->wait_list, &queue->rsp_wr_wait_list);
537*4882a593Smuzhiyun 			break;
538*4882a593Smuzhiyun 		}
539*4882a593Smuzhiyun 	}
540*4882a593Smuzhiyun 	spin_unlock(&queue->rsp_wr_wait_lock);
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun 
nvmet_rdma_check_pi_status(struct ib_mr * sig_mr)543*4882a593Smuzhiyun static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun 	struct ib_mr_status mr_status;
546*4882a593Smuzhiyun 	int ret;
547*4882a593Smuzhiyun 	u16 status = 0;
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
550*4882a593Smuzhiyun 	if (ret) {
551*4882a593Smuzhiyun 		pr_err("ib_check_mr_status failed, ret %d\n", ret);
552*4882a593Smuzhiyun 		return NVME_SC_INVALID_PI;
553*4882a593Smuzhiyun 	}
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 	if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
556*4882a593Smuzhiyun 		switch (mr_status.sig_err.err_type) {
557*4882a593Smuzhiyun 		case IB_SIG_BAD_GUARD:
558*4882a593Smuzhiyun 			status = NVME_SC_GUARD_CHECK;
559*4882a593Smuzhiyun 			break;
560*4882a593Smuzhiyun 		case IB_SIG_BAD_REFTAG:
561*4882a593Smuzhiyun 			status = NVME_SC_REFTAG_CHECK;
562*4882a593Smuzhiyun 			break;
563*4882a593Smuzhiyun 		case IB_SIG_BAD_APPTAG:
564*4882a593Smuzhiyun 			status = NVME_SC_APPTAG_CHECK;
565*4882a593Smuzhiyun 			break;
566*4882a593Smuzhiyun 		}
567*4882a593Smuzhiyun 		pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
568*4882a593Smuzhiyun 		       mr_status.sig_err.err_type,
569*4882a593Smuzhiyun 		       mr_status.sig_err.expected,
570*4882a593Smuzhiyun 		       mr_status.sig_err.actual);
571*4882a593Smuzhiyun 	}
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	return status;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun 
nvmet_rdma_set_sig_domain(struct blk_integrity * bi,struct nvme_command * cmd,struct ib_sig_domain * domain,u16 control,u8 pi_type)576*4882a593Smuzhiyun static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
577*4882a593Smuzhiyun 		struct nvme_command *cmd, struct ib_sig_domain *domain,
578*4882a593Smuzhiyun 		u16 control, u8 pi_type)
579*4882a593Smuzhiyun {
580*4882a593Smuzhiyun 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
581*4882a593Smuzhiyun 	domain->sig.dif.bg_type = IB_T10DIF_CRC;
582*4882a593Smuzhiyun 	domain->sig.dif.pi_interval = 1 << bi->interval_exp;
583*4882a593Smuzhiyun 	domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
584*4882a593Smuzhiyun 	if (control & NVME_RW_PRINFO_PRCHK_REF)
585*4882a593Smuzhiyun 		domain->sig.dif.ref_remap = true;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
588*4882a593Smuzhiyun 	domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
589*4882a593Smuzhiyun 	domain->sig.dif.app_escape = true;
590*4882a593Smuzhiyun 	if (pi_type == NVME_NS_DPS_PI_TYPE3)
591*4882a593Smuzhiyun 		domain->sig.dif.ref_escape = true;
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun 
nvmet_rdma_set_sig_attrs(struct nvmet_req * req,struct ib_sig_attrs * sig_attrs)594*4882a593Smuzhiyun static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
595*4882a593Smuzhiyun 				     struct ib_sig_attrs *sig_attrs)
596*4882a593Smuzhiyun {
597*4882a593Smuzhiyun 	struct nvme_command *cmd = req->cmd;
598*4882a593Smuzhiyun 	u16 control = le16_to_cpu(cmd->rw.control);
599*4882a593Smuzhiyun 	u8 pi_type = req->ns->pi_type;
600*4882a593Smuzhiyun 	struct blk_integrity *bi;
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	bi = bdev_get_integrity(req->ns->bdev);
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	memset(sig_attrs, 0, sizeof(*sig_attrs));
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	if (control & NVME_RW_PRINFO_PRACT) {
607*4882a593Smuzhiyun 		/* for WRITE_INSERT/READ_STRIP no wire domain */
608*4882a593Smuzhiyun 		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
609*4882a593Smuzhiyun 		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
610*4882a593Smuzhiyun 					  pi_type);
611*4882a593Smuzhiyun 		/* Clear the PRACT bit since HCA will generate/verify the PI */
612*4882a593Smuzhiyun 		control &= ~NVME_RW_PRINFO_PRACT;
613*4882a593Smuzhiyun 		cmd->rw.control = cpu_to_le16(control);
614*4882a593Smuzhiyun 		/* PI is added by the HW */
615*4882a593Smuzhiyun 		req->transfer_len += req->metadata_len;
616*4882a593Smuzhiyun 	} else {
617*4882a593Smuzhiyun 		/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
618*4882a593Smuzhiyun 		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
619*4882a593Smuzhiyun 					  pi_type);
620*4882a593Smuzhiyun 		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
621*4882a593Smuzhiyun 					  pi_type);
622*4882a593Smuzhiyun 	}
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 	if (control & NVME_RW_PRINFO_PRCHK_REF)
625*4882a593Smuzhiyun 		sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
626*4882a593Smuzhiyun 	if (control & NVME_RW_PRINFO_PRCHK_GUARD)
627*4882a593Smuzhiyun 		sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
628*4882a593Smuzhiyun 	if (control & NVME_RW_PRINFO_PRCHK_APP)
629*4882a593Smuzhiyun 		sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
630*4882a593Smuzhiyun }
631*4882a593Smuzhiyun 
nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp * rsp,u64 addr,u32 key,struct ib_sig_attrs * sig_attrs)632*4882a593Smuzhiyun static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
633*4882a593Smuzhiyun 				  struct ib_sig_attrs *sig_attrs)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
636*4882a593Smuzhiyun 	struct nvmet_req *req = &rsp->req;
637*4882a593Smuzhiyun 	int ret;
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	if (req->metadata_len)
640*4882a593Smuzhiyun 		ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
641*4882a593Smuzhiyun 			cm_id->port_num, req->sg, req->sg_cnt,
642*4882a593Smuzhiyun 			req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
643*4882a593Smuzhiyun 			addr, key, nvmet_data_dir(req));
644*4882a593Smuzhiyun 	else
645*4882a593Smuzhiyun 		ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
646*4882a593Smuzhiyun 				       req->sg, req->sg_cnt, 0, addr, key,
647*4882a593Smuzhiyun 				       nvmet_data_dir(req));
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 	return ret;
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun 
nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp * rsp)652*4882a593Smuzhiyun static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
653*4882a593Smuzhiyun {
654*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
655*4882a593Smuzhiyun 	struct nvmet_req *req = &rsp->req;
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 	if (req->metadata_len)
658*4882a593Smuzhiyun 		rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
659*4882a593Smuzhiyun 			cm_id->port_num, req->sg, req->sg_cnt,
660*4882a593Smuzhiyun 			req->metadata_sg, req->metadata_sg_cnt,
661*4882a593Smuzhiyun 			nvmet_data_dir(req));
662*4882a593Smuzhiyun 	else
663*4882a593Smuzhiyun 		rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
664*4882a593Smuzhiyun 				    req->sg, req->sg_cnt, nvmet_data_dir(req));
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun 
nvmet_rdma_release_rsp(struct nvmet_rdma_rsp * rsp)667*4882a593Smuzhiyun static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
668*4882a593Smuzhiyun {
669*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = rsp->queue;
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 	atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	if (rsp->n_rdma)
674*4882a593Smuzhiyun 		nvmet_rdma_rw_ctx_destroy(rsp);
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 	if (rsp->req.sg != rsp->cmd->inline_sg)
677*4882a593Smuzhiyun 		nvmet_req_free_sgls(&rsp->req);
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
680*4882a593Smuzhiyun 		nvmet_rdma_process_wr_wait_list(queue);
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	nvmet_rdma_put_rsp(rsp);
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun 
nvmet_rdma_error_comp(struct nvmet_rdma_queue * queue)685*4882a593Smuzhiyun static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun 	if (queue->nvme_sq.ctrl) {
688*4882a593Smuzhiyun 		nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
689*4882a593Smuzhiyun 	} else {
690*4882a593Smuzhiyun 		/*
691*4882a593Smuzhiyun 		 * we didn't setup the controller yet in case
692*4882a593Smuzhiyun 		 * of admin connect error, just disconnect and
693*4882a593Smuzhiyun 		 * cleanup the queue
694*4882a593Smuzhiyun 		 */
695*4882a593Smuzhiyun 		nvmet_rdma_queue_disconnect(queue);
696*4882a593Smuzhiyun 	}
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun 
nvmet_rdma_send_done(struct ib_cq * cq,struct ib_wc * wc)699*4882a593Smuzhiyun static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
700*4882a593Smuzhiyun {
701*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp =
702*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
703*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	nvmet_rdma_release_rsp(rsp);
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	if (unlikely(wc->status != IB_WC_SUCCESS &&
708*4882a593Smuzhiyun 		     wc->status != IB_WC_WR_FLUSH_ERR)) {
709*4882a593Smuzhiyun 		pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
710*4882a593Smuzhiyun 			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
711*4882a593Smuzhiyun 		nvmet_rdma_error_comp(queue);
712*4882a593Smuzhiyun 	}
713*4882a593Smuzhiyun }
714*4882a593Smuzhiyun 
nvmet_rdma_queue_response(struct nvmet_req * req)715*4882a593Smuzhiyun static void nvmet_rdma_queue_response(struct nvmet_req *req)
716*4882a593Smuzhiyun {
717*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp =
718*4882a593Smuzhiyun 		container_of(req, struct nvmet_rdma_rsp, req);
719*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
720*4882a593Smuzhiyun 	struct ib_send_wr *first_wr;
721*4882a593Smuzhiyun 
722*4882a593Smuzhiyun 	if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
723*4882a593Smuzhiyun 		rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
724*4882a593Smuzhiyun 		rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
725*4882a593Smuzhiyun 	} else {
726*4882a593Smuzhiyun 		rsp->send_wr.opcode = IB_WR_SEND;
727*4882a593Smuzhiyun 	}
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	if (nvmet_rdma_need_data_out(rsp)) {
730*4882a593Smuzhiyun 		if (rsp->req.metadata_len)
731*4882a593Smuzhiyun 			first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
732*4882a593Smuzhiyun 					cm_id->port_num, &rsp->write_cqe, NULL);
733*4882a593Smuzhiyun 		else
734*4882a593Smuzhiyun 			first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
735*4882a593Smuzhiyun 					cm_id->port_num, NULL, &rsp->send_wr);
736*4882a593Smuzhiyun 	} else {
737*4882a593Smuzhiyun 		first_wr = &rsp->send_wr;
738*4882a593Smuzhiyun 	}
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	ib_dma_sync_single_for_device(rsp->queue->dev->device,
743*4882a593Smuzhiyun 		rsp->send_sge.addr, rsp->send_sge.length,
744*4882a593Smuzhiyun 		DMA_TO_DEVICE);
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
747*4882a593Smuzhiyun 		pr_err("sending cmd response failed\n");
748*4882a593Smuzhiyun 		nvmet_rdma_release_rsp(rsp);
749*4882a593Smuzhiyun 	}
750*4882a593Smuzhiyun }
751*4882a593Smuzhiyun 
nvmet_rdma_read_data_done(struct ib_cq * cq,struct ib_wc * wc)752*4882a593Smuzhiyun static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
753*4882a593Smuzhiyun {
754*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp =
755*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
756*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
757*4882a593Smuzhiyun 	u16 status = 0;
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun 	WARN_ON(rsp->n_rdma <= 0);
760*4882a593Smuzhiyun 	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
761*4882a593Smuzhiyun 	rsp->n_rdma = 0;
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
764*4882a593Smuzhiyun 		nvmet_rdma_rw_ctx_destroy(rsp);
765*4882a593Smuzhiyun 		nvmet_req_uninit(&rsp->req);
766*4882a593Smuzhiyun 		nvmet_rdma_release_rsp(rsp);
767*4882a593Smuzhiyun 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
768*4882a593Smuzhiyun 			pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
769*4882a593Smuzhiyun 				wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
770*4882a593Smuzhiyun 			nvmet_rdma_error_comp(queue);
771*4882a593Smuzhiyun 		}
772*4882a593Smuzhiyun 		return;
773*4882a593Smuzhiyun 	}
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	if (rsp->req.metadata_len)
776*4882a593Smuzhiyun 		status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
777*4882a593Smuzhiyun 	nvmet_rdma_rw_ctx_destroy(rsp);
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	if (unlikely(status))
780*4882a593Smuzhiyun 		nvmet_req_complete(&rsp->req, status);
781*4882a593Smuzhiyun 	else
782*4882a593Smuzhiyun 		rsp->req.execute(&rsp->req);
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun 
nvmet_rdma_write_data_done(struct ib_cq * cq,struct ib_wc * wc)785*4882a593Smuzhiyun static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
786*4882a593Smuzhiyun {
787*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp =
788*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
789*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
790*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
791*4882a593Smuzhiyun 	u16 status;
792*4882a593Smuzhiyun 
793*4882a593Smuzhiyun 	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
794*4882a593Smuzhiyun 		return;
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	WARN_ON(rsp->n_rdma <= 0);
797*4882a593Smuzhiyun 	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
798*4882a593Smuzhiyun 	rsp->n_rdma = 0;
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
801*4882a593Smuzhiyun 		nvmet_rdma_rw_ctx_destroy(rsp);
802*4882a593Smuzhiyun 		nvmet_req_uninit(&rsp->req);
803*4882a593Smuzhiyun 		nvmet_rdma_release_rsp(rsp);
804*4882a593Smuzhiyun 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
805*4882a593Smuzhiyun 			pr_info("RDMA WRITE for CQE failed with status %s (%d).\n",
806*4882a593Smuzhiyun 				ib_wc_status_msg(wc->status), wc->status);
807*4882a593Smuzhiyun 			nvmet_rdma_error_comp(queue);
808*4882a593Smuzhiyun 		}
809*4882a593Smuzhiyun 		return;
810*4882a593Smuzhiyun 	}
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 	/*
813*4882a593Smuzhiyun 	 * Upon RDMA completion check the signature status
814*4882a593Smuzhiyun 	 * - if succeeded send good NVMe response
815*4882a593Smuzhiyun 	 * - if failed send bad NVMe response with appropriate error
816*4882a593Smuzhiyun 	 */
817*4882a593Smuzhiyun 	status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
818*4882a593Smuzhiyun 	if (unlikely(status))
819*4882a593Smuzhiyun 		rsp->req.cqe->status = cpu_to_le16(status << 1);
820*4882a593Smuzhiyun 	nvmet_rdma_rw_ctx_destroy(rsp);
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
823*4882a593Smuzhiyun 		pr_err("sending cmd response failed\n");
824*4882a593Smuzhiyun 		nvmet_rdma_release_rsp(rsp);
825*4882a593Smuzhiyun 	}
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun 
nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp * rsp,u32 len,u64 off)828*4882a593Smuzhiyun static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
829*4882a593Smuzhiyun 		u64 off)
830*4882a593Smuzhiyun {
831*4882a593Smuzhiyun 	int sg_count = num_pages(len);
832*4882a593Smuzhiyun 	struct scatterlist *sg;
833*4882a593Smuzhiyun 	int i;
834*4882a593Smuzhiyun 
835*4882a593Smuzhiyun 	sg = rsp->cmd->inline_sg;
836*4882a593Smuzhiyun 	for (i = 0; i < sg_count; i++, sg++) {
837*4882a593Smuzhiyun 		if (i < sg_count - 1)
838*4882a593Smuzhiyun 			sg_unmark_end(sg);
839*4882a593Smuzhiyun 		else
840*4882a593Smuzhiyun 			sg_mark_end(sg);
841*4882a593Smuzhiyun 		sg->offset = off;
842*4882a593Smuzhiyun 		sg->length = min_t(int, len, PAGE_SIZE - off);
843*4882a593Smuzhiyun 		len -= sg->length;
844*4882a593Smuzhiyun 		if (!i)
845*4882a593Smuzhiyun 			off = 0;
846*4882a593Smuzhiyun 	}
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	rsp->req.sg = rsp->cmd->inline_sg;
849*4882a593Smuzhiyun 	rsp->req.sg_cnt = sg_count;
850*4882a593Smuzhiyun }
851*4882a593Smuzhiyun 
nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp * rsp)852*4882a593Smuzhiyun static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
853*4882a593Smuzhiyun {
854*4882a593Smuzhiyun 	struct nvme_sgl_desc *sgl = &rsp->req.cmd->common.dptr.sgl;
855*4882a593Smuzhiyun 	u64 off = le64_to_cpu(sgl->addr);
856*4882a593Smuzhiyun 	u32 len = le32_to_cpu(sgl->length);
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 	if (!nvme_is_write(rsp->req.cmd)) {
859*4882a593Smuzhiyun 		rsp->req.error_loc =
860*4882a593Smuzhiyun 			offsetof(struct nvme_common_command, opcode);
861*4882a593Smuzhiyun 		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
862*4882a593Smuzhiyun 	}
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	if (off + len > rsp->queue->dev->inline_data_size) {
865*4882a593Smuzhiyun 		pr_err("invalid inline data offset!\n");
866*4882a593Smuzhiyun 		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
867*4882a593Smuzhiyun 	}
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 	/* no data command? */
870*4882a593Smuzhiyun 	if (!len)
871*4882a593Smuzhiyun 		return 0;
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 	nvmet_rdma_use_inline_sg(rsp, len, off);
874*4882a593Smuzhiyun 	rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA;
875*4882a593Smuzhiyun 	rsp->req.transfer_len += len;
876*4882a593Smuzhiyun 	return 0;
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun 
nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp * rsp,struct nvme_keyed_sgl_desc * sgl,bool invalidate)879*4882a593Smuzhiyun static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
880*4882a593Smuzhiyun 		struct nvme_keyed_sgl_desc *sgl, bool invalidate)
881*4882a593Smuzhiyun {
882*4882a593Smuzhiyun 	u64 addr = le64_to_cpu(sgl->addr);
883*4882a593Smuzhiyun 	u32 key = get_unaligned_le32(sgl->key);
884*4882a593Smuzhiyun 	struct ib_sig_attrs sig_attrs;
885*4882a593Smuzhiyun 	int ret;
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 	rsp->req.transfer_len = get_unaligned_le24(sgl->length);
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun 	/* no data command? */
890*4882a593Smuzhiyun 	if (!rsp->req.transfer_len)
891*4882a593Smuzhiyun 		return 0;
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 	if (rsp->req.metadata_len)
894*4882a593Smuzhiyun 		nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	ret = nvmet_req_alloc_sgls(&rsp->req);
897*4882a593Smuzhiyun 	if (unlikely(ret < 0))
898*4882a593Smuzhiyun 		goto error_out;
899*4882a593Smuzhiyun 
900*4882a593Smuzhiyun 	ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
901*4882a593Smuzhiyun 	if (unlikely(ret < 0))
902*4882a593Smuzhiyun 		goto error_out;
903*4882a593Smuzhiyun 	rsp->n_rdma += ret;
904*4882a593Smuzhiyun 
905*4882a593Smuzhiyun 	if (invalidate) {
906*4882a593Smuzhiyun 		rsp->invalidate_rkey = key;
907*4882a593Smuzhiyun 		rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
908*4882a593Smuzhiyun 	}
909*4882a593Smuzhiyun 
910*4882a593Smuzhiyun 	return 0;
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun error_out:
913*4882a593Smuzhiyun 	rsp->req.transfer_len = 0;
914*4882a593Smuzhiyun 	return NVME_SC_INTERNAL;
915*4882a593Smuzhiyun }
916*4882a593Smuzhiyun 
nvmet_rdma_map_sgl(struct nvmet_rdma_rsp * rsp)917*4882a593Smuzhiyun static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
918*4882a593Smuzhiyun {
919*4882a593Smuzhiyun 	struct nvme_keyed_sgl_desc *sgl = &rsp->req.cmd->common.dptr.ksgl;
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	switch (sgl->type >> 4) {
922*4882a593Smuzhiyun 	case NVME_SGL_FMT_DATA_DESC:
923*4882a593Smuzhiyun 		switch (sgl->type & 0xf) {
924*4882a593Smuzhiyun 		case NVME_SGL_FMT_OFFSET:
925*4882a593Smuzhiyun 			return nvmet_rdma_map_sgl_inline(rsp);
926*4882a593Smuzhiyun 		default:
927*4882a593Smuzhiyun 			pr_err("invalid SGL subtype: %#x\n", sgl->type);
928*4882a593Smuzhiyun 			rsp->req.error_loc =
929*4882a593Smuzhiyun 				offsetof(struct nvme_common_command, dptr);
930*4882a593Smuzhiyun 			return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
931*4882a593Smuzhiyun 		}
932*4882a593Smuzhiyun 	case NVME_KEY_SGL_FMT_DATA_DESC:
933*4882a593Smuzhiyun 		switch (sgl->type & 0xf) {
934*4882a593Smuzhiyun 		case NVME_SGL_FMT_ADDRESS | NVME_SGL_FMT_INVALIDATE:
935*4882a593Smuzhiyun 			return nvmet_rdma_map_sgl_keyed(rsp, sgl, true);
936*4882a593Smuzhiyun 		case NVME_SGL_FMT_ADDRESS:
937*4882a593Smuzhiyun 			return nvmet_rdma_map_sgl_keyed(rsp, sgl, false);
938*4882a593Smuzhiyun 		default:
939*4882a593Smuzhiyun 			pr_err("invalid SGL subtype: %#x\n", sgl->type);
940*4882a593Smuzhiyun 			rsp->req.error_loc =
941*4882a593Smuzhiyun 				offsetof(struct nvme_common_command, dptr);
942*4882a593Smuzhiyun 			return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
943*4882a593Smuzhiyun 		}
944*4882a593Smuzhiyun 	default:
945*4882a593Smuzhiyun 		pr_err("invalid SGL type: %#x\n", sgl->type);
946*4882a593Smuzhiyun 		rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
947*4882a593Smuzhiyun 		return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
948*4882a593Smuzhiyun 	}
949*4882a593Smuzhiyun }
950*4882a593Smuzhiyun 
nvmet_rdma_execute_command(struct nvmet_rdma_rsp * rsp)951*4882a593Smuzhiyun static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
952*4882a593Smuzhiyun {
953*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = rsp->queue;
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 	if (unlikely(atomic_sub_return(1 + rsp->n_rdma,
956*4882a593Smuzhiyun 			&queue->sq_wr_avail) < 0)) {
957*4882a593Smuzhiyun 		pr_debug("IB send queue full (needed %d): queue %u cntlid %u\n",
958*4882a593Smuzhiyun 				1 + rsp->n_rdma, queue->idx,
959*4882a593Smuzhiyun 				queue->nvme_sq.ctrl->cntlid);
960*4882a593Smuzhiyun 		atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
961*4882a593Smuzhiyun 		return false;
962*4882a593Smuzhiyun 	}
963*4882a593Smuzhiyun 
964*4882a593Smuzhiyun 	if (nvmet_rdma_need_data_in(rsp)) {
965*4882a593Smuzhiyun 		if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
966*4882a593Smuzhiyun 				queue->cm_id->port_num, &rsp->read_cqe, NULL))
967*4882a593Smuzhiyun 			nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
968*4882a593Smuzhiyun 	} else {
969*4882a593Smuzhiyun 		rsp->req.execute(&rsp->req);
970*4882a593Smuzhiyun 	}
971*4882a593Smuzhiyun 
972*4882a593Smuzhiyun 	return true;
973*4882a593Smuzhiyun }
974*4882a593Smuzhiyun 
nvmet_rdma_handle_command(struct nvmet_rdma_queue * queue,struct nvmet_rdma_rsp * cmd)975*4882a593Smuzhiyun static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
976*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *cmd)
977*4882a593Smuzhiyun {
978*4882a593Smuzhiyun 	u16 status;
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 	ib_dma_sync_single_for_cpu(queue->dev->device,
981*4882a593Smuzhiyun 		cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
982*4882a593Smuzhiyun 		DMA_FROM_DEVICE);
983*4882a593Smuzhiyun 	ib_dma_sync_single_for_cpu(queue->dev->device,
984*4882a593Smuzhiyun 		cmd->send_sge.addr, cmd->send_sge.length,
985*4882a593Smuzhiyun 		DMA_TO_DEVICE);
986*4882a593Smuzhiyun 
987*4882a593Smuzhiyun 	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
988*4882a593Smuzhiyun 			&queue->nvme_sq, &nvmet_rdma_ops))
989*4882a593Smuzhiyun 		return;
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 	status = nvmet_rdma_map_sgl(cmd);
992*4882a593Smuzhiyun 	if (status)
993*4882a593Smuzhiyun 		goto out_err;
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 	if (unlikely(!nvmet_rdma_execute_command(cmd))) {
996*4882a593Smuzhiyun 		spin_lock(&queue->rsp_wr_wait_lock);
997*4882a593Smuzhiyun 		list_add_tail(&cmd->wait_list, &queue->rsp_wr_wait_list);
998*4882a593Smuzhiyun 		spin_unlock(&queue->rsp_wr_wait_lock);
999*4882a593Smuzhiyun 	}
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	return;
1002*4882a593Smuzhiyun 
1003*4882a593Smuzhiyun out_err:
1004*4882a593Smuzhiyun 	nvmet_req_complete(&cmd->req, status);
1005*4882a593Smuzhiyun }
1006*4882a593Smuzhiyun 
nvmet_rdma_recv_done(struct ib_cq * cq,struct ib_wc * wc)1007*4882a593Smuzhiyun static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1008*4882a593Smuzhiyun {
1009*4882a593Smuzhiyun 	struct nvmet_rdma_cmd *cmd =
1010*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe);
1011*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
1012*4882a593Smuzhiyun 	struct nvmet_rdma_rsp *rsp;
1013*4882a593Smuzhiyun 
1014*4882a593Smuzhiyun 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1015*4882a593Smuzhiyun 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
1016*4882a593Smuzhiyun 			pr_err("RECV for CQE 0x%p failed with status %s (%d)\n",
1017*4882a593Smuzhiyun 				wc->wr_cqe, ib_wc_status_msg(wc->status),
1018*4882a593Smuzhiyun 				wc->status);
1019*4882a593Smuzhiyun 			nvmet_rdma_error_comp(queue);
1020*4882a593Smuzhiyun 		}
1021*4882a593Smuzhiyun 		return;
1022*4882a593Smuzhiyun 	}
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 	if (unlikely(wc->byte_len < sizeof(struct nvme_command))) {
1025*4882a593Smuzhiyun 		pr_err("Ctrl Fatal Error: capsule size less than 64 bytes\n");
1026*4882a593Smuzhiyun 		nvmet_rdma_error_comp(queue);
1027*4882a593Smuzhiyun 		return;
1028*4882a593Smuzhiyun 	}
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun 	cmd->queue = queue;
1031*4882a593Smuzhiyun 	rsp = nvmet_rdma_get_rsp(queue);
1032*4882a593Smuzhiyun 	if (unlikely(!rsp)) {
1033*4882a593Smuzhiyun 		/*
1034*4882a593Smuzhiyun 		 * we get here only under memory pressure,
1035*4882a593Smuzhiyun 		 * silently drop and have the host retry
1036*4882a593Smuzhiyun 		 * as we can't even fail it.
1037*4882a593Smuzhiyun 		 */
1038*4882a593Smuzhiyun 		nvmet_rdma_post_recv(queue->dev, cmd);
1039*4882a593Smuzhiyun 		return;
1040*4882a593Smuzhiyun 	}
1041*4882a593Smuzhiyun 	rsp->queue = queue;
1042*4882a593Smuzhiyun 	rsp->cmd = cmd;
1043*4882a593Smuzhiyun 	rsp->flags = 0;
1044*4882a593Smuzhiyun 	rsp->req.cmd = cmd->nvme_cmd;
1045*4882a593Smuzhiyun 	rsp->req.port = queue->port;
1046*4882a593Smuzhiyun 	rsp->n_rdma = 0;
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun 	if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
1049*4882a593Smuzhiyun 		unsigned long flags;
1050*4882a593Smuzhiyun 
1051*4882a593Smuzhiyun 		spin_lock_irqsave(&queue->state_lock, flags);
1052*4882a593Smuzhiyun 		if (queue->state == NVMET_RDMA_Q_CONNECTING)
1053*4882a593Smuzhiyun 			list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
1054*4882a593Smuzhiyun 		else
1055*4882a593Smuzhiyun 			nvmet_rdma_put_rsp(rsp);
1056*4882a593Smuzhiyun 		spin_unlock_irqrestore(&queue->state_lock, flags);
1057*4882a593Smuzhiyun 		return;
1058*4882a593Smuzhiyun 	}
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun 	nvmet_rdma_handle_command(queue, rsp);
1061*4882a593Smuzhiyun }
1062*4882a593Smuzhiyun 
nvmet_rdma_destroy_srq(struct nvmet_rdma_srq * nsrq)1063*4882a593Smuzhiyun static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
1064*4882a593Smuzhiyun {
1065*4882a593Smuzhiyun 	nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
1066*4882a593Smuzhiyun 			     false);
1067*4882a593Smuzhiyun 	ib_destroy_srq(nsrq->srq);
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 	kfree(nsrq);
1070*4882a593Smuzhiyun }
1071*4882a593Smuzhiyun 
nvmet_rdma_destroy_srqs(struct nvmet_rdma_device * ndev)1072*4882a593Smuzhiyun static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
1073*4882a593Smuzhiyun {
1074*4882a593Smuzhiyun 	int i;
1075*4882a593Smuzhiyun 
1076*4882a593Smuzhiyun 	if (!ndev->srqs)
1077*4882a593Smuzhiyun 		return;
1078*4882a593Smuzhiyun 
1079*4882a593Smuzhiyun 	for (i = 0; i < ndev->srq_count; i++)
1080*4882a593Smuzhiyun 		nvmet_rdma_destroy_srq(ndev->srqs[i]);
1081*4882a593Smuzhiyun 
1082*4882a593Smuzhiyun 	kfree(ndev->srqs);
1083*4882a593Smuzhiyun }
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun static struct nvmet_rdma_srq *
nvmet_rdma_init_srq(struct nvmet_rdma_device * ndev)1086*4882a593Smuzhiyun nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
1087*4882a593Smuzhiyun {
1088*4882a593Smuzhiyun 	struct ib_srq_init_attr srq_attr = { NULL, };
1089*4882a593Smuzhiyun 	size_t srq_size = ndev->srq_size;
1090*4882a593Smuzhiyun 	struct nvmet_rdma_srq *nsrq;
1091*4882a593Smuzhiyun 	struct ib_srq *srq;
1092*4882a593Smuzhiyun 	int ret, i;
1093*4882a593Smuzhiyun 
1094*4882a593Smuzhiyun 	nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
1095*4882a593Smuzhiyun 	if (!nsrq)
1096*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	srq_attr.attr.max_wr = srq_size;
1099*4882a593Smuzhiyun 	srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
1100*4882a593Smuzhiyun 	srq_attr.attr.srq_limit = 0;
1101*4882a593Smuzhiyun 	srq_attr.srq_type = IB_SRQT_BASIC;
1102*4882a593Smuzhiyun 	srq = ib_create_srq(ndev->pd, &srq_attr);
1103*4882a593Smuzhiyun 	if (IS_ERR(srq)) {
1104*4882a593Smuzhiyun 		ret = PTR_ERR(srq);
1105*4882a593Smuzhiyun 		goto out_free;
1106*4882a593Smuzhiyun 	}
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun 	nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
1109*4882a593Smuzhiyun 	if (IS_ERR(nsrq->cmds)) {
1110*4882a593Smuzhiyun 		ret = PTR_ERR(nsrq->cmds);
1111*4882a593Smuzhiyun 		goto out_destroy_srq;
1112*4882a593Smuzhiyun 	}
1113*4882a593Smuzhiyun 
1114*4882a593Smuzhiyun 	nsrq->srq = srq;
1115*4882a593Smuzhiyun 	nsrq->ndev = ndev;
1116*4882a593Smuzhiyun 
1117*4882a593Smuzhiyun 	for (i = 0; i < srq_size; i++) {
1118*4882a593Smuzhiyun 		nsrq->cmds[i].nsrq = nsrq;
1119*4882a593Smuzhiyun 		ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
1120*4882a593Smuzhiyun 		if (ret)
1121*4882a593Smuzhiyun 			goto out_free_cmds;
1122*4882a593Smuzhiyun 	}
1123*4882a593Smuzhiyun 
1124*4882a593Smuzhiyun 	return nsrq;
1125*4882a593Smuzhiyun 
1126*4882a593Smuzhiyun out_free_cmds:
1127*4882a593Smuzhiyun 	nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
1128*4882a593Smuzhiyun out_destroy_srq:
1129*4882a593Smuzhiyun 	ib_destroy_srq(srq);
1130*4882a593Smuzhiyun out_free:
1131*4882a593Smuzhiyun 	kfree(nsrq);
1132*4882a593Smuzhiyun 	return ERR_PTR(ret);
1133*4882a593Smuzhiyun }
1134*4882a593Smuzhiyun 
nvmet_rdma_init_srqs(struct nvmet_rdma_device * ndev)1135*4882a593Smuzhiyun static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
1136*4882a593Smuzhiyun {
1137*4882a593Smuzhiyun 	int i, ret;
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
1140*4882a593Smuzhiyun 		/*
1141*4882a593Smuzhiyun 		 * If SRQs aren't supported we just go ahead and use normal
1142*4882a593Smuzhiyun 		 * non-shared receive queues.
1143*4882a593Smuzhiyun 		 */
1144*4882a593Smuzhiyun 		pr_info("SRQ requested but not supported.\n");
1145*4882a593Smuzhiyun 		return 0;
1146*4882a593Smuzhiyun 	}
1147*4882a593Smuzhiyun 
1148*4882a593Smuzhiyun 	ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
1149*4882a593Smuzhiyun 			     nvmet_rdma_srq_size);
1150*4882a593Smuzhiyun 	ndev->srq_count = min(ndev->device->num_comp_vectors,
1151*4882a593Smuzhiyun 			      ndev->device->attrs.max_srq);
1152*4882a593Smuzhiyun 
1153*4882a593Smuzhiyun 	ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
1154*4882a593Smuzhiyun 	if (!ndev->srqs)
1155*4882a593Smuzhiyun 		return -ENOMEM;
1156*4882a593Smuzhiyun 
1157*4882a593Smuzhiyun 	for (i = 0; i < ndev->srq_count; i++) {
1158*4882a593Smuzhiyun 		ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
1159*4882a593Smuzhiyun 		if (IS_ERR(ndev->srqs[i])) {
1160*4882a593Smuzhiyun 			ret = PTR_ERR(ndev->srqs[i]);
1161*4882a593Smuzhiyun 			goto err_srq;
1162*4882a593Smuzhiyun 		}
1163*4882a593Smuzhiyun 	}
1164*4882a593Smuzhiyun 
1165*4882a593Smuzhiyun 	return 0;
1166*4882a593Smuzhiyun 
1167*4882a593Smuzhiyun err_srq:
1168*4882a593Smuzhiyun 	while (--i >= 0)
1169*4882a593Smuzhiyun 		nvmet_rdma_destroy_srq(ndev->srqs[i]);
1170*4882a593Smuzhiyun 	kfree(ndev->srqs);
1171*4882a593Smuzhiyun 	return ret;
1172*4882a593Smuzhiyun }
1173*4882a593Smuzhiyun 
nvmet_rdma_free_dev(struct kref * ref)1174*4882a593Smuzhiyun static void nvmet_rdma_free_dev(struct kref *ref)
1175*4882a593Smuzhiyun {
1176*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev =
1177*4882a593Smuzhiyun 		container_of(ref, struct nvmet_rdma_device, ref);
1178*4882a593Smuzhiyun 
1179*4882a593Smuzhiyun 	mutex_lock(&device_list_mutex);
1180*4882a593Smuzhiyun 	list_del(&ndev->entry);
1181*4882a593Smuzhiyun 	mutex_unlock(&device_list_mutex);
1182*4882a593Smuzhiyun 
1183*4882a593Smuzhiyun 	nvmet_rdma_destroy_srqs(ndev);
1184*4882a593Smuzhiyun 	ib_dealloc_pd(ndev->pd);
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun 	kfree(ndev);
1187*4882a593Smuzhiyun }
1188*4882a593Smuzhiyun 
1189*4882a593Smuzhiyun static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id * cm_id)1190*4882a593Smuzhiyun nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
1191*4882a593Smuzhiyun {
1192*4882a593Smuzhiyun 	struct nvmet_rdma_port *port = cm_id->context;
1193*4882a593Smuzhiyun 	struct nvmet_port *nport = port->nport;
1194*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev;
1195*4882a593Smuzhiyun 	int inline_page_count;
1196*4882a593Smuzhiyun 	int inline_sge_count;
1197*4882a593Smuzhiyun 	int ret;
1198*4882a593Smuzhiyun 
1199*4882a593Smuzhiyun 	mutex_lock(&device_list_mutex);
1200*4882a593Smuzhiyun 	list_for_each_entry(ndev, &device_list, entry) {
1201*4882a593Smuzhiyun 		if (ndev->device->node_guid == cm_id->device->node_guid &&
1202*4882a593Smuzhiyun 		    kref_get_unless_zero(&ndev->ref))
1203*4882a593Smuzhiyun 			goto out_unlock;
1204*4882a593Smuzhiyun 	}
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
1207*4882a593Smuzhiyun 	if (!ndev)
1208*4882a593Smuzhiyun 		goto out_err;
1209*4882a593Smuzhiyun 
1210*4882a593Smuzhiyun 	inline_page_count = num_pages(nport->inline_data_size);
1211*4882a593Smuzhiyun 	inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
1212*4882a593Smuzhiyun 				cm_id->device->attrs.max_recv_sge) - 1;
1213*4882a593Smuzhiyun 	if (inline_page_count > inline_sge_count) {
1214*4882a593Smuzhiyun 		pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
1215*4882a593Smuzhiyun 			nport->inline_data_size, cm_id->device->name,
1216*4882a593Smuzhiyun 			inline_sge_count * PAGE_SIZE);
1217*4882a593Smuzhiyun 		nport->inline_data_size = inline_sge_count * PAGE_SIZE;
1218*4882a593Smuzhiyun 		inline_page_count = inline_sge_count;
1219*4882a593Smuzhiyun 	}
1220*4882a593Smuzhiyun 	ndev->inline_data_size = nport->inline_data_size;
1221*4882a593Smuzhiyun 	ndev->inline_page_count = inline_page_count;
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 	if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags &
1224*4882a593Smuzhiyun 				  IB_DEVICE_INTEGRITY_HANDOVER)) {
1225*4882a593Smuzhiyun 		pr_warn("T10-PI is not supported by device %s. Disabling it\n",
1226*4882a593Smuzhiyun 			cm_id->device->name);
1227*4882a593Smuzhiyun 		nport->pi_enable = false;
1228*4882a593Smuzhiyun 	}
1229*4882a593Smuzhiyun 
1230*4882a593Smuzhiyun 	ndev->device = cm_id->device;
1231*4882a593Smuzhiyun 	kref_init(&ndev->ref);
1232*4882a593Smuzhiyun 
1233*4882a593Smuzhiyun 	ndev->pd = ib_alloc_pd(ndev->device, 0);
1234*4882a593Smuzhiyun 	if (IS_ERR(ndev->pd))
1235*4882a593Smuzhiyun 		goto out_free_dev;
1236*4882a593Smuzhiyun 
1237*4882a593Smuzhiyun 	if (nvmet_rdma_use_srq) {
1238*4882a593Smuzhiyun 		ret = nvmet_rdma_init_srqs(ndev);
1239*4882a593Smuzhiyun 		if (ret)
1240*4882a593Smuzhiyun 			goto out_free_pd;
1241*4882a593Smuzhiyun 	}
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 	list_add(&ndev->entry, &device_list);
1244*4882a593Smuzhiyun out_unlock:
1245*4882a593Smuzhiyun 	mutex_unlock(&device_list_mutex);
1246*4882a593Smuzhiyun 	pr_debug("added %s.\n", ndev->device->name);
1247*4882a593Smuzhiyun 	return ndev;
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun out_free_pd:
1250*4882a593Smuzhiyun 	ib_dealloc_pd(ndev->pd);
1251*4882a593Smuzhiyun out_free_dev:
1252*4882a593Smuzhiyun 	kfree(ndev);
1253*4882a593Smuzhiyun out_err:
1254*4882a593Smuzhiyun 	mutex_unlock(&device_list_mutex);
1255*4882a593Smuzhiyun 	return NULL;
1256*4882a593Smuzhiyun }
1257*4882a593Smuzhiyun 
nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue * queue)1258*4882a593Smuzhiyun static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
1259*4882a593Smuzhiyun {
1260*4882a593Smuzhiyun 	struct ib_qp_init_attr qp_attr;
1261*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev = queue->dev;
1262*4882a593Smuzhiyun 	int nr_cqe, ret, i, factor;
1263*4882a593Smuzhiyun 
1264*4882a593Smuzhiyun 	/*
1265*4882a593Smuzhiyun 	 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
1266*4882a593Smuzhiyun 	 */
1267*4882a593Smuzhiyun 	nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun 	queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1,
1270*4882a593Smuzhiyun 				   queue->comp_vector, IB_POLL_WORKQUEUE);
1271*4882a593Smuzhiyun 	if (IS_ERR(queue->cq)) {
1272*4882a593Smuzhiyun 		ret = PTR_ERR(queue->cq);
1273*4882a593Smuzhiyun 		pr_err("failed to create CQ cqe= %d ret= %d\n",
1274*4882a593Smuzhiyun 		       nr_cqe + 1, ret);
1275*4882a593Smuzhiyun 		goto out;
1276*4882a593Smuzhiyun 	}
1277*4882a593Smuzhiyun 
1278*4882a593Smuzhiyun 	memset(&qp_attr, 0, sizeof(qp_attr));
1279*4882a593Smuzhiyun 	qp_attr.qp_context = queue;
1280*4882a593Smuzhiyun 	qp_attr.event_handler = nvmet_rdma_qp_event;
1281*4882a593Smuzhiyun 	qp_attr.send_cq = queue->cq;
1282*4882a593Smuzhiyun 	qp_attr.recv_cq = queue->cq;
1283*4882a593Smuzhiyun 	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1284*4882a593Smuzhiyun 	qp_attr.qp_type = IB_QPT_RC;
1285*4882a593Smuzhiyun 	/* +1 for drain */
1286*4882a593Smuzhiyun 	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
1287*4882a593Smuzhiyun 	factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
1288*4882a593Smuzhiyun 				   1 << NVMET_RDMA_MAX_MDTS);
1289*4882a593Smuzhiyun 	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
1290*4882a593Smuzhiyun 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
1291*4882a593Smuzhiyun 					ndev->device->attrs.max_send_sge);
1292*4882a593Smuzhiyun 
1293*4882a593Smuzhiyun 	if (queue->nsrq) {
1294*4882a593Smuzhiyun 		qp_attr.srq = queue->nsrq->srq;
1295*4882a593Smuzhiyun 	} else {
1296*4882a593Smuzhiyun 		/* +1 for drain */
1297*4882a593Smuzhiyun 		qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
1298*4882a593Smuzhiyun 		qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
1299*4882a593Smuzhiyun 	}
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun 	if (queue->port->pi_enable && queue->host_qid)
1302*4882a593Smuzhiyun 		qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
1303*4882a593Smuzhiyun 
1304*4882a593Smuzhiyun 	ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
1305*4882a593Smuzhiyun 	if (ret) {
1306*4882a593Smuzhiyun 		pr_err("failed to create_qp ret= %d\n", ret);
1307*4882a593Smuzhiyun 		goto err_destroy_cq;
1308*4882a593Smuzhiyun 	}
1309*4882a593Smuzhiyun 	queue->qp = queue->cm_id->qp;
1310*4882a593Smuzhiyun 
1311*4882a593Smuzhiyun 	atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
1312*4882a593Smuzhiyun 
1313*4882a593Smuzhiyun 	pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
1314*4882a593Smuzhiyun 		 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
1315*4882a593Smuzhiyun 		 qp_attr.cap.max_send_wr, queue->cm_id);
1316*4882a593Smuzhiyun 
1317*4882a593Smuzhiyun 	if (!queue->nsrq) {
1318*4882a593Smuzhiyun 		for (i = 0; i < queue->recv_queue_size; i++) {
1319*4882a593Smuzhiyun 			queue->cmds[i].queue = queue;
1320*4882a593Smuzhiyun 			ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
1321*4882a593Smuzhiyun 			if (ret)
1322*4882a593Smuzhiyun 				goto err_destroy_qp;
1323*4882a593Smuzhiyun 		}
1324*4882a593Smuzhiyun 	}
1325*4882a593Smuzhiyun 
1326*4882a593Smuzhiyun out:
1327*4882a593Smuzhiyun 	return ret;
1328*4882a593Smuzhiyun 
1329*4882a593Smuzhiyun err_destroy_qp:
1330*4882a593Smuzhiyun 	rdma_destroy_qp(queue->cm_id);
1331*4882a593Smuzhiyun err_destroy_cq:
1332*4882a593Smuzhiyun 	ib_cq_pool_put(queue->cq, nr_cqe + 1);
1333*4882a593Smuzhiyun 	goto out;
1334*4882a593Smuzhiyun }
1335*4882a593Smuzhiyun 
nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue * queue)1336*4882a593Smuzhiyun static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
1337*4882a593Smuzhiyun {
1338*4882a593Smuzhiyun 	ib_drain_qp(queue->qp);
1339*4882a593Smuzhiyun 	if (queue->cm_id)
1340*4882a593Smuzhiyun 		rdma_destroy_id(queue->cm_id);
1341*4882a593Smuzhiyun 	ib_destroy_qp(queue->qp);
1342*4882a593Smuzhiyun 	ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 *
1343*4882a593Smuzhiyun 		       queue->send_queue_size + 1);
1344*4882a593Smuzhiyun }
1345*4882a593Smuzhiyun 
nvmet_rdma_free_queue(struct nvmet_rdma_queue * queue)1346*4882a593Smuzhiyun static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
1347*4882a593Smuzhiyun {
1348*4882a593Smuzhiyun 	pr_debug("freeing queue %d\n", queue->idx);
1349*4882a593Smuzhiyun 
1350*4882a593Smuzhiyun 	nvmet_sq_destroy(&queue->nvme_sq);
1351*4882a593Smuzhiyun 
1352*4882a593Smuzhiyun 	nvmet_rdma_destroy_queue_ib(queue);
1353*4882a593Smuzhiyun 	if (!queue->nsrq) {
1354*4882a593Smuzhiyun 		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1355*4882a593Smuzhiyun 				queue->recv_queue_size,
1356*4882a593Smuzhiyun 				!queue->host_qid);
1357*4882a593Smuzhiyun 	}
1358*4882a593Smuzhiyun 	nvmet_rdma_free_rsps(queue);
1359*4882a593Smuzhiyun 	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
1360*4882a593Smuzhiyun 	kfree(queue);
1361*4882a593Smuzhiyun }
1362*4882a593Smuzhiyun 
nvmet_rdma_release_queue_work(struct work_struct * w)1363*4882a593Smuzhiyun static void nvmet_rdma_release_queue_work(struct work_struct *w)
1364*4882a593Smuzhiyun {
1365*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue =
1366*4882a593Smuzhiyun 		container_of(w, struct nvmet_rdma_queue, release_work);
1367*4882a593Smuzhiyun 	struct nvmet_rdma_device *dev = queue->dev;
1368*4882a593Smuzhiyun 
1369*4882a593Smuzhiyun 	nvmet_rdma_free_queue(queue);
1370*4882a593Smuzhiyun 
1371*4882a593Smuzhiyun 	kref_put(&dev->ref, nvmet_rdma_free_dev);
1372*4882a593Smuzhiyun }
1373*4882a593Smuzhiyun 
1374*4882a593Smuzhiyun static int
nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param * conn,struct nvmet_rdma_queue * queue)1375*4882a593Smuzhiyun nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
1376*4882a593Smuzhiyun 				struct nvmet_rdma_queue *queue)
1377*4882a593Smuzhiyun {
1378*4882a593Smuzhiyun 	struct nvme_rdma_cm_req *req;
1379*4882a593Smuzhiyun 
1380*4882a593Smuzhiyun 	req = (struct nvme_rdma_cm_req *)conn->private_data;
1381*4882a593Smuzhiyun 	if (!req || conn->private_data_len == 0)
1382*4882a593Smuzhiyun 		return NVME_RDMA_CM_INVALID_LEN;
1383*4882a593Smuzhiyun 
1384*4882a593Smuzhiyun 	if (le16_to_cpu(req->recfmt) != NVME_RDMA_CM_FMT_1_0)
1385*4882a593Smuzhiyun 		return NVME_RDMA_CM_INVALID_RECFMT;
1386*4882a593Smuzhiyun 
1387*4882a593Smuzhiyun 	queue->host_qid = le16_to_cpu(req->qid);
1388*4882a593Smuzhiyun 
1389*4882a593Smuzhiyun 	/*
1390*4882a593Smuzhiyun 	 * req->hsqsize corresponds to our recv queue size plus 1
1391*4882a593Smuzhiyun 	 * req->hrqsize corresponds to our send queue size
1392*4882a593Smuzhiyun 	 */
1393*4882a593Smuzhiyun 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
1394*4882a593Smuzhiyun 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
1395*4882a593Smuzhiyun 
1396*4882a593Smuzhiyun 	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
1397*4882a593Smuzhiyun 		return NVME_RDMA_CM_INVALID_HSQSIZE;
1398*4882a593Smuzhiyun 
1399*4882a593Smuzhiyun 	/* XXX: Should we enforce some kind of max for IO queues? */
1400*4882a593Smuzhiyun 
1401*4882a593Smuzhiyun 	return 0;
1402*4882a593Smuzhiyun }
1403*4882a593Smuzhiyun 
nvmet_rdma_cm_reject(struct rdma_cm_id * cm_id,enum nvme_rdma_cm_status status)1404*4882a593Smuzhiyun static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
1405*4882a593Smuzhiyun 				enum nvme_rdma_cm_status status)
1406*4882a593Smuzhiyun {
1407*4882a593Smuzhiyun 	struct nvme_rdma_cm_rej rej;
1408*4882a593Smuzhiyun 
1409*4882a593Smuzhiyun 	pr_debug("rejecting connect request: status %d (%s)\n",
1410*4882a593Smuzhiyun 		 status, nvme_rdma_cm_msg(status));
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 	rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
1413*4882a593Smuzhiyun 	rej.sts = cpu_to_le16(status);
1414*4882a593Smuzhiyun 
1415*4882a593Smuzhiyun 	return rdma_reject(cm_id, (void *)&rej, sizeof(rej),
1416*4882a593Smuzhiyun 			   IB_CM_REJ_CONSUMER_DEFINED);
1417*4882a593Smuzhiyun }
1418*4882a593Smuzhiyun 
1419*4882a593Smuzhiyun static struct nvmet_rdma_queue *
nvmet_rdma_alloc_queue(struct nvmet_rdma_device * ndev,struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1420*4882a593Smuzhiyun nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
1421*4882a593Smuzhiyun 		struct rdma_cm_id *cm_id,
1422*4882a593Smuzhiyun 		struct rdma_cm_event *event)
1423*4882a593Smuzhiyun {
1424*4882a593Smuzhiyun 	struct nvmet_rdma_port *port = cm_id->context;
1425*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue;
1426*4882a593Smuzhiyun 	int ret;
1427*4882a593Smuzhiyun 
1428*4882a593Smuzhiyun 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1429*4882a593Smuzhiyun 	if (!queue) {
1430*4882a593Smuzhiyun 		ret = NVME_RDMA_CM_NO_RSC;
1431*4882a593Smuzhiyun 		goto out_reject;
1432*4882a593Smuzhiyun 	}
1433*4882a593Smuzhiyun 
1434*4882a593Smuzhiyun 	ret = nvmet_sq_init(&queue->nvme_sq);
1435*4882a593Smuzhiyun 	if (ret) {
1436*4882a593Smuzhiyun 		ret = NVME_RDMA_CM_NO_RSC;
1437*4882a593Smuzhiyun 		goto out_free_queue;
1438*4882a593Smuzhiyun 	}
1439*4882a593Smuzhiyun 
1440*4882a593Smuzhiyun 	ret = nvmet_rdma_parse_cm_connect_req(&event->param.conn, queue);
1441*4882a593Smuzhiyun 	if (ret)
1442*4882a593Smuzhiyun 		goto out_destroy_sq;
1443*4882a593Smuzhiyun 
1444*4882a593Smuzhiyun 	/*
1445*4882a593Smuzhiyun 	 * Schedules the actual release because calling rdma_destroy_id from
1446*4882a593Smuzhiyun 	 * inside a CM callback would trigger a deadlock. (great API design..)
1447*4882a593Smuzhiyun 	 */
1448*4882a593Smuzhiyun 	INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
1449*4882a593Smuzhiyun 	queue->dev = ndev;
1450*4882a593Smuzhiyun 	queue->cm_id = cm_id;
1451*4882a593Smuzhiyun 	queue->port = port->nport;
1452*4882a593Smuzhiyun 
1453*4882a593Smuzhiyun 	spin_lock_init(&queue->state_lock);
1454*4882a593Smuzhiyun 	queue->state = NVMET_RDMA_Q_CONNECTING;
1455*4882a593Smuzhiyun 	INIT_LIST_HEAD(&queue->rsp_wait_list);
1456*4882a593Smuzhiyun 	INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
1457*4882a593Smuzhiyun 	spin_lock_init(&queue->rsp_wr_wait_lock);
1458*4882a593Smuzhiyun 	INIT_LIST_HEAD(&queue->free_rsps);
1459*4882a593Smuzhiyun 	spin_lock_init(&queue->rsps_lock);
1460*4882a593Smuzhiyun 	INIT_LIST_HEAD(&queue->queue_list);
1461*4882a593Smuzhiyun 
1462*4882a593Smuzhiyun 	queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
1463*4882a593Smuzhiyun 	if (queue->idx < 0) {
1464*4882a593Smuzhiyun 		ret = NVME_RDMA_CM_NO_RSC;
1465*4882a593Smuzhiyun 		goto out_destroy_sq;
1466*4882a593Smuzhiyun 	}
1467*4882a593Smuzhiyun 
1468*4882a593Smuzhiyun 	/*
1469*4882a593Smuzhiyun 	 * Spread the io queues across completion vectors,
1470*4882a593Smuzhiyun 	 * but still keep all admin queues on vector 0.
1471*4882a593Smuzhiyun 	 */
1472*4882a593Smuzhiyun 	queue->comp_vector = !queue->host_qid ? 0 :
1473*4882a593Smuzhiyun 		queue->idx % ndev->device->num_comp_vectors;
1474*4882a593Smuzhiyun 
1475*4882a593Smuzhiyun 
1476*4882a593Smuzhiyun 	ret = nvmet_rdma_alloc_rsps(queue);
1477*4882a593Smuzhiyun 	if (ret) {
1478*4882a593Smuzhiyun 		ret = NVME_RDMA_CM_NO_RSC;
1479*4882a593Smuzhiyun 		goto out_ida_remove;
1480*4882a593Smuzhiyun 	}
1481*4882a593Smuzhiyun 
1482*4882a593Smuzhiyun 	if (ndev->srqs) {
1483*4882a593Smuzhiyun 		queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
1484*4882a593Smuzhiyun 	} else {
1485*4882a593Smuzhiyun 		queue->cmds = nvmet_rdma_alloc_cmds(ndev,
1486*4882a593Smuzhiyun 				queue->recv_queue_size,
1487*4882a593Smuzhiyun 				!queue->host_qid);
1488*4882a593Smuzhiyun 		if (IS_ERR(queue->cmds)) {
1489*4882a593Smuzhiyun 			ret = NVME_RDMA_CM_NO_RSC;
1490*4882a593Smuzhiyun 			goto out_free_responses;
1491*4882a593Smuzhiyun 		}
1492*4882a593Smuzhiyun 	}
1493*4882a593Smuzhiyun 
1494*4882a593Smuzhiyun 	ret = nvmet_rdma_create_queue_ib(queue);
1495*4882a593Smuzhiyun 	if (ret) {
1496*4882a593Smuzhiyun 		pr_err("%s: creating RDMA queue failed (%d).\n",
1497*4882a593Smuzhiyun 			__func__, ret);
1498*4882a593Smuzhiyun 		ret = NVME_RDMA_CM_NO_RSC;
1499*4882a593Smuzhiyun 		goto out_free_cmds;
1500*4882a593Smuzhiyun 	}
1501*4882a593Smuzhiyun 
1502*4882a593Smuzhiyun 	return queue;
1503*4882a593Smuzhiyun 
1504*4882a593Smuzhiyun out_free_cmds:
1505*4882a593Smuzhiyun 	if (!queue->nsrq) {
1506*4882a593Smuzhiyun 		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1507*4882a593Smuzhiyun 				queue->recv_queue_size,
1508*4882a593Smuzhiyun 				!queue->host_qid);
1509*4882a593Smuzhiyun 	}
1510*4882a593Smuzhiyun out_free_responses:
1511*4882a593Smuzhiyun 	nvmet_rdma_free_rsps(queue);
1512*4882a593Smuzhiyun out_ida_remove:
1513*4882a593Smuzhiyun 	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
1514*4882a593Smuzhiyun out_destroy_sq:
1515*4882a593Smuzhiyun 	nvmet_sq_destroy(&queue->nvme_sq);
1516*4882a593Smuzhiyun out_free_queue:
1517*4882a593Smuzhiyun 	kfree(queue);
1518*4882a593Smuzhiyun out_reject:
1519*4882a593Smuzhiyun 	nvmet_rdma_cm_reject(cm_id, ret);
1520*4882a593Smuzhiyun 	return NULL;
1521*4882a593Smuzhiyun }
1522*4882a593Smuzhiyun 
nvmet_rdma_qp_event(struct ib_event * event,void * priv)1523*4882a593Smuzhiyun static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
1524*4882a593Smuzhiyun {
1525*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = priv;
1526*4882a593Smuzhiyun 
1527*4882a593Smuzhiyun 	switch (event->event) {
1528*4882a593Smuzhiyun 	case IB_EVENT_COMM_EST:
1529*4882a593Smuzhiyun 		rdma_notify(queue->cm_id, event->event);
1530*4882a593Smuzhiyun 		break;
1531*4882a593Smuzhiyun 	case IB_EVENT_QP_LAST_WQE_REACHED:
1532*4882a593Smuzhiyun 		pr_debug("received last WQE reached event for queue=0x%p\n",
1533*4882a593Smuzhiyun 			 queue);
1534*4882a593Smuzhiyun 		break;
1535*4882a593Smuzhiyun 	default:
1536*4882a593Smuzhiyun 		pr_err("received IB QP event: %s (%d)\n",
1537*4882a593Smuzhiyun 		       ib_event_msg(event->event), event->event);
1538*4882a593Smuzhiyun 		break;
1539*4882a593Smuzhiyun 	}
1540*4882a593Smuzhiyun }
1541*4882a593Smuzhiyun 
nvmet_rdma_cm_accept(struct rdma_cm_id * cm_id,struct nvmet_rdma_queue * queue,struct rdma_conn_param * p)1542*4882a593Smuzhiyun static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
1543*4882a593Smuzhiyun 		struct nvmet_rdma_queue *queue,
1544*4882a593Smuzhiyun 		struct rdma_conn_param *p)
1545*4882a593Smuzhiyun {
1546*4882a593Smuzhiyun 	struct rdma_conn_param  param = { };
1547*4882a593Smuzhiyun 	struct nvme_rdma_cm_rep priv = { };
1548*4882a593Smuzhiyun 	int ret = -ENOMEM;
1549*4882a593Smuzhiyun 
1550*4882a593Smuzhiyun 	param.rnr_retry_count = 7;
1551*4882a593Smuzhiyun 	param.flow_control = 1;
1552*4882a593Smuzhiyun 	param.initiator_depth = min_t(u8, p->initiator_depth,
1553*4882a593Smuzhiyun 		queue->dev->device->attrs.max_qp_init_rd_atom);
1554*4882a593Smuzhiyun 	param.private_data = &priv;
1555*4882a593Smuzhiyun 	param.private_data_len = sizeof(priv);
1556*4882a593Smuzhiyun 	priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
1557*4882a593Smuzhiyun 	priv.crqsize = cpu_to_le16(queue->recv_queue_size);
1558*4882a593Smuzhiyun 
1559*4882a593Smuzhiyun 	ret = rdma_accept(cm_id, &param);
1560*4882a593Smuzhiyun 	if (ret)
1561*4882a593Smuzhiyun 		pr_err("rdma_accept failed (error code = %d)\n", ret);
1562*4882a593Smuzhiyun 
1563*4882a593Smuzhiyun 	return ret;
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun 
nvmet_rdma_queue_connect(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1566*4882a593Smuzhiyun static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
1567*4882a593Smuzhiyun 		struct rdma_cm_event *event)
1568*4882a593Smuzhiyun {
1569*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev;
1570*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue;
1571*4882a593Smuzhiyun 	int ret = -EINVAL;
1572*4882a593Smuzhiyun 
1573*4882a593Smuzhiyun 	ndev = nvmet_rdma_find_get_device(cm_id);
1574*4882a593Smuzhiyun 	if (!ndev) {
1575*4882a593Smuzhiyun 		nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
1576*4882a593Smuzhiyun 		return -ECONNREFUSED;
1577*4882a593Smuzhiyun 	}
1578*4882a593Smuzhiyun 
1579*4882a593Smuzhiyun 	queue = nvmet_rdma_alloc_queue(ndev, cm_id, event);
1580*4882a593Smuzhiyun 	if (!queue) {
1581*4882a593Smuzhiyun 		ret = -ENOMEM;
1582*4882a593Smuzhiyun 		goto put_device;
1583*4882a593Smuzhiyun 	}
1584*4882a593Smuzhiyun 
1585*4882a593Smuzhiyun 	if (queue->host_qid == 0) {
1586*4882a593Smuzhiyun 		/* Let inflight controller teardown complete */
1587*4882a593Smuzhiyun 		flush_scheduled_work();
1588*4882a593Smuzhiyun 	}
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
1591*4882a593Smuzhiyun 	if (ret) {
1592*4882a593Smuzhiyun 		/*
1593*4882a593Smuzhiyun 		 * Don't destroy the cm_id in free path, as we implicitly
1594*4882a593Smuzhiyun 		 * destroy the cm_id here with non-zero ret code.
1595*4882a593Smuzhiyun 		 */
1596*4882a593Smuzhiyun 		queue->cm_id = NULL;
1597*4882a593Smuzhiyun 		goto free_queue;
1598*4882a593Smuzhiyun 	}
1599*4882a593Smuzhiyun 
1600*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
1601*4882a593Smuzhiyun 	list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list);
1602*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
1603*4882a593Smuzhiyun 
1604*4882a593Smuzhiyun 	return 0;
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun free_queue:
1607*4882a593Smuzhiyun 	nvmet_rdma_free_queue(queue);
1608*4882a593Smuzhiyun put_device:
1609*4882a593Smuzhiyun 	kref_put(&ndev->ref, nvmet_rdma_free_dev);
1610*4882a593Smuzhiyun 
1611*4882a593Smuzhiyun 	return ret;
1612*4882a593Smuzhiyun }
1613*4882a593Smuzhiyun 
nvmet_rdma_queue_established(struct nvmet_rdma_queue * queue)1614*4882a593Smuzhiyun static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue)
1615*4882a593Smuzhiyun {
1616*4882a593Smuzhiyun 	unsigned long flags;
1617*4882a593Smuzhiyun 
1618*4882a593Smuzhiyun 	spin_lock_irqsave(&queue->state_lock, flags);
1619*4882a593Smuzhiyun 	if (queue->state != NVMET_RDMA_Q_CONNECTING) {
1620*4882a593Smuzhiyun 		pr_warn("trying to establish a connected queue\n");
1621*4882a593Smuzhiyun 		goto out_unlock;
1622*4882a593Smuzhiyun 	}
1623*4882a593Smuzhiyun 	queue->state = NVMET_RDMA_Q_LIVE;
1624*4882a593Smuzhiyun 
1625*4882a593Smuzhiyun 	while (!list_empty(&queue->rsp_wait_list)) {
1626*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *cmd;
1627*4882a593Smuzhiyun 
1628*4882a593Smuzhiyun 		cmd = list_first_entry(&queue->rsp_wait_list,
1629*4882a593Smuzhiyun 					struct nvmet_rdma_rsp, wait_list);
1630*4882a593Smuzhiyun 		list_del(&cmd->wait_list);
1631*4882a593Smuzhiyun 
1632*4882a593Smuzhiyun 		spin_unlock_irqrestore(&queue->state_lock, flags);
1633*4882a593Smuzhiyun 		nvmet_rdma_handle_command(queue, cmd);
1634*4882a593Smuzhiyun 		spin_lock_irqsave(&queue->state_lock, flags);
1635*4882a593Smuzhiyun 	}
1636*4882a593Smuzhiyun 
1637*4882a593Smuzhiyun out_unlock:
1638*4882a593Smuzhiyun 	spin_unlock_irqrestore(&queue->state_lock, flags);
1639*4882a593Smuzhiyun }
1640*4882a593Smuzhiyun 
__nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue * queue)1641*4882a593Smuzhiyun static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
1642*4882a593Smuzhiyun {
1643*4882a593Smuzhiyun 	bool disconnect = false;
1644*4882a593Smuzhiyun 	unsigned long flags;
1645*4882a593Smuzhiyun 
1646*4882a593Smuzhiyun 	pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state);
1647*4882a593Smuzhiyun 
1648*4882a593Smuzhiyun 	spin_lock_irqsave(&queue->state_lock, flags);
1649*4882a593Smuzhiyun 	switch (queue->state) {
1650*4882a593Smuzhiyun 	case NVMET_RDMA_Q_CONNECTING:
1651*4882a593Smuzhiyun 		while (!list_empty(&queue->rsp_wait_list)) {
1652*4882a593Smuzhiyun 			struct nvmet_rdma_rsp *rsp;
1653*4882a593Smuzhiyun 
1654*4882a593Smuzhiyun 			rsp = list_first_entry(&queue->rsp_wait_list,
1655*4882a593Smuzhiyun 					       struct nvmet_rdma_rsp,
1656*4882a593Smuzhiyun 					       wait_list);
1657*4882a593Smuzhiyun 			list_del(&rsp->wait_list);
1658*4882a593Smuzhiyun 			nvmet_rdma_put_rsp(rsp);
1659*4882a593Smuzhiyun 		}
1660*4882a593Smuzhiyun 		fallthrough;
1661*4882a593Smuzhiyun 	case NVMET_RDMA_Q_LIVE:
1662*4882a593Smuzhiyun 		queue->state = NVMET_RDMA_Q_DISCONNECTING;
1663*4882a593Smuzhiyun 		disconnect = true;
1664*4882a593Smuzhiyun 		break;
1665*4882a593Smuzhiyun 	case NVMET_RDMA_Q_DISCONNECTING:
1666*4882a593Smuzhiyun 		break;
1667*4882a593Smuzhiyun 	}
1668*4882a593Smuzhiyun 	spin_unlock_irqrestore(&queue->state_lock, flags);
1669*4882a593Smuzhiyun 
1670*4882a593Smuzhiyun 	if (disconnect) {
1671*4882a593Smuzhiyun 		rdma_disconnect(queue->cm_id);
1672*4882a593Smuzhiyun 		schedule_work(&queue->release_work);
1673*4882a593Smuzhiyun 	}
1674*4882a593Smuzhiyun }
1675*4882a593Smuzhiyun 
nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue * queue)1676*4882a593Smuzhiyun static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
1677*4882a593Smuzhiyun {
1678*4882a593Smuzhiyun 	bool disconnect = false;
1679*4882a593Smuzhiyun 
1680*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
1681*4882a593Smuzhiyun 	if (!list_empty(&queue->queue_list)) {
1682*4882a593Smuzhiyun 		list_del_init(&queue->queue_list);
1683*4882a593Smuzhiyun 		disconnect = true;
1684*4882a593Smuzhiyun 	}
1685*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
1686*4882a593Smuzhiyun 
1687*4882a593Smuzhiyun 	if (disconnect)
1688*4882a593Smuzhiyun 		__nvmet_rdma_queue_disconnect(queue);
1689*4882a593Smuzhiyun }
1690*4882a593Smuzhiyun 
nvmet_rdma_queue_connect_fail(struct rdma_cm_id * cm_id,struct nvmet_rdma_queue * queue)1691*4882a593Smuzhiyun static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
1692*4882a593Smuzhiyun 		struct nvmet_rdma_queue *queue)
1693*4882a593Smuzhiyun {
1694*4882a593Smuzhiyun 	WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING);
1695*4882a593Smuzhiyun 
1696*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
1697*4882a593Smuzhiyun 	if (!list_empty(&queue->queue_list))
1698*4882a593Smuzhiyun 		list_del_init(&queue->queue_list);
1699*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
1700*4882a593Smuzhiyun 
1701*4882a593Smuzhiyun 	pr_err("failed to connect queue %d\n", queue->idx);
1702*4882a593Smuzhiyun 	schedule_work(&queue->release_work);
1703*4882a593Smuzhiyun }
1704*4882a593Smuzhiyun 
1705*4882a593Smuzhiyun /**
1706*4882a593Smuzhiyun  * nvme_rdma_device_removal() - Handle RDMA device removal
1707*4882a593Smuzhiyun  * @cm_id:	rdma_cm id, used for nvmet port
1708*4882a593Smuzhiyun  * @queue:      nvmet rdma queue (cm id qp_context)
1709*4882a593Smuzhiyun  *
1710*4882a593Smuzhiyun  * DEVICE_REMOVAL event notifies us that the RDMA device is about
1711*4882a593Smuzhiyun  * to unplug. Note that this event can be generated on a normal
1712*4882a593Smuzhiyun  * queue cm_id and/or a device bound listener cm_id (where in this
1713*4882a593Smuzhiyun  * case queue will be null).
1714*4882a593Smuzhiyun  *
1715*4882a593Smuzhiyun  * We registered an ib_client to handle device removal for queues,
1716*4882a593Smuzhiyun  * so we only need to handle the listening port cm_ids. In this case
1717*4882a593Smuzhiyun  * we nullify the priv to prevent double cm_id destruction and destroying
1718*4882a593Smuzhiyun  * the cm_id implicitely by returning a non-zero rc to the callout.
1719*4882a593Smuzhiyun  */
nvmet_rdma_device_removal(struct rdma_cm_id * cm_id,struct nvmet_rdma_queue * queue)1720*4882a593Smuzhiyun static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
1721*4882a593Smuzhiyun 		struct nvmet_rdma_queue *queue)
1722*4882a593Smuzhiyun {
1723*4882a593Smuzhiyun 	struct nvmet_rdma_port *port;
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun 	if (queue) {
1726*4882a593Smuzhiyun 		/*
1727*4882a593Smuzhiyun 		 * This is a queue cm_id. we have registered
1728*4882a593Smuzhiyun 		 * an ib_client to handle queues removal
1729*4882a593Smuzhiyun 		 * so don't interfear and just return.
1730*4882a593Smuzhiyun 		 */
1731*4882a593Smuzhiyun 		return 0;
1732*4882a593Smuzhiyun 	}
1733*4882a593Smuzhiyun 
1734*4882a593Smuzhiyun 	port = cm_id->context;
1735*4882a593Smuzhiyun 
1736*4882a593Smuzhiyun 	/*
1737*4882a593Smuzhiyun 	 * This is a listener cm_id. Make sure that
1738*4882a593Smuzhiyun 	 * future remove_port won't invoke a double
1739*4882a593Smuzhiyun 	 * cm_id destroy. use atomic xchg to make sure
1740*4882a593Smuzhiyun 	 * we don't compete with remove_port.
1741*4882a593Smuzhiyun 	 */
1742*4882a593Smuzhiyun 	if (xchg(&port->cm_id, NULL) != cm_id)
1743*4882a593Smuzhiyun 		return 0;
1744*4882a593Smuzhiyun 
1745*4882a593Smuzhiyun 	/*
1746*4882a593Smuzhiyun 	 * We need to return 1 so that the core will destroy
1747*4882a593Smuzhiyun 	 * it's own ID.  What a great API design..
1748*4882a593Smuzhiyun 	 */
1749*4882a593Smuzhiyun 	return 1;
1750*4882a593Smuzhiyun }
1751*4882a593Smuzhiyun 
nvmet_rdma_cm_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)1752*4882a593Smuzhiyun static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
1753*4882a593Smuzhiyun 		struct rdma_cm_event *event)
1754*4882a593Smuzhiyun {
1755*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue = NULL;
1756*4882a593Smuzhiyun 	int ret = 0;
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 	if (cm_id->qp)
1759*4882a593Smuzhiyun 		queue = cm_id->qp->qp_context;
1760*4882a593Smuzhiyun 
1761*4882a593Smuzhiyun 	pr_debug("%s (%d): status %d id %p\n",
1762*4882a593Smuzhiyun 		rdma_event_msg(event->event), event->event,
1763*4882a593Smuzhiyun 		event->status, cm_id);
1764*4882a593Smuzhiyun 
1765*4882a593Smuzhiyun 	switch (event->event) {
1766*4882a593Smuzhiyun 	case RDMA_CM_EVENT_CONNECT_REQUEST:
1767*4882a593Smuzhiyun 		ret = nvmet_rdma_queue_connect(cm_id, event);
1768*4882a593Smuzhiyun 		break;
1769*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ESTABLISHED:
1770*4882a593Smuzhiyun 		nvmet_rdma_queue_established(queue);
1771*4882a593Smuzhiyun 		break;
1772*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ADDR_CHANGE:
1773*4882a593Smuzhiyun 		if (!queue) {
1774*4882a593Smuzhiyun 			struct nvmet_rdma_port *port = cm_id->context;
1775*4882a593Smuzhiyun 
1776*4882a593Smuzhiyun 			schedule_delayed_work(&port->repair_work, 0);
1777*4882a593Smuzhiyun 			break;
1778*4882a593Smuzhiyun 		}
1779*4882a593Smuzhiyun 		fallthrough;
1780*4882a593Smuzhiyun 	case RDMA_CM_EVENT_DISCONNECTED:
1781*4882a593Smuzhiyun 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1782*4882a593Smuzhiyun 		nvmet_rdma_queue_disconnect(queue);
1783*4882a593Smuzhiyun 		break;
1784*4882a593Smuzhiyun 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
1785*4882a593Smuzhiyun 		ret = nvmet_rdma_device_removal(cm_id, queue);
1786*4882a593Smuzhiyun 		break;
1787*4882a593Smuzhiyun 	case RDMA_CM_EVENT_REJECTED:
1788*4882a593Smuzhiyun 		pr_debug("Connection rejected: %s\n",
1789*4882a593Smuzhiyun 			 rdma_reject_msg(cm_id, event->status));
1790*4882a593Smuzhiyun 		fallthrough;
1791*4882a593Smuzhiyun 	case RDMA_CM_EVENT_UNREACHABLE:
1792*4882a593Smuzhiyun 	case RDMA_CM_EVENT_CONNECT_ERROR:
1793*4882a593Smuzhiyun 		nvmet_rdma_queue_connect_fail(cm_id, queue);
1794*4882a593Smuzhiyun 		break;
1795*4882a593Smuzhiyun 	default:
1796*4882a593Smuzhiyun 		pr_err("received unrecognized RDMA CM event %d\n",
1797*4882a593Smuzhiyun 			event->event);
1798*4882a593Smuzhiyun 		break;
1799*4882a593Smuzhiyun 	}
1800*4882a593Smuzhiyun 
1801*4882a593Smuzhiyun 	return ret;
1802*4882a593Smuzhiyun }
1803*4882a593Smuzhiyun 
nvmet_rdma_delete_ctrl(struct nvmet_ctrl * ctrl)1804*4882a593Smuzhiyun static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
1805*4882a593Smuzhiyun {
1806*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue;
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun restart:
1809*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
1810*4882a593Smuzhiyun 	list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
1811*4882a593Smuzhiyun 		if (queue->nvme_sq.ctrl == ctrl) {
1812*4882a593Smuzhiyun 			list_del_init(&queue->queue_list);
1813*4882a593Smuzhiyun 			mutex_unlock(&nvmet_rdma_queue_mutex);
1814*4882a593Smuzhiyun 
1815*4882a593Smuzhiyun 			__nvmet_rdma_queue_disconnect(queue);
1816*4882a593Smuzhiyun 			goto restart;
1817*4882a593Smuzhiyun 		}
1818*4882a593Smuzhiyun 	}
1819*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
1820*4882a593Smuzhiyun }
1821*4882a593Smuzhiyun 
nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port * port)1822*4882a593Smuzhiyun static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port)
1823*4882a593Smuzhiyun {
1824*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue, *tmp;
1825*4882a593Smuzhiyun 	struct nvmet_port *nport = port->nport;
1826*4882a593Smuzhiyun 
1827*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
1828*4882a593Smuzhiyun 	list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
1829*4882a593Smuzhiyun 				 queue_list) {
1830*4882a593Smuzhiyun 		if (queue->port != nport)
1831*4882a593Smuzhiyun 			continue;
1832*4882a593Smuzhiyun 
1833*4882a593Smuzhiyun 		list_del_init(&queue->queue_list);
1834*4882a593Smuzhiyun 		__nvmet_rdma_queue_disconnect(queue);
1835*4882a593Smuzhiyun 	}
1836*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
1837*4882a593Smuzhiyun }
1838*4882a593Smuzhiyun 
nvmet_rdma_disable_port(struct nvmet_rdma_port * port)1839*4882a593Smuzhiyun static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
1840*4882a593Smuzhiyun {
1841*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL);
1842*4882a593Smuzhiyun 
1843*4882a593Smuzhiyun 	if (cm_id)
1844*4882a593Smuzhiyun 		rdma_destroy_id(cm_id);
1845*4882a593Smuzhiyun 
1846*4882a593Smuzhiyun 	/*
1847*4882a593Smuzhiyun 	 * Destroy the remaining queues, which are not belong to any
1848*4882a593Smuzhiyun 	 * controller yet. Do it here after the RDMA-CM was destroyed
1849*4882a593Smuzhiyun 	 * guarantees that no new queue will be created.
1850*4882a593Smuzhiyun 	 */
1851*4882a593Smuzhiyun 	nvmet_rdma_destroy_port_queues(port);
1852*4882a593Smuzhiyun }
1853*4882a593Smuzhiyun 
nvmet_rdma_enable_port(struct nvmet_rdma_port * port)1854*4882a593Smuzhiyun static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
1855*4882a593Smuzhiyun {
1856*4882a593Smuzhiyun 	struct sockaddr *addr = (struct sockaddr *)&port->addr;
1857*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id;
1858*4882a593Smuzhiyun 	int ret;
1859*4882a593Smuzhiyun 
1860*4882a593Smuzhiyun 	cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
1861*4882a593Smuzhiyun 			RDMA_PS_TCP, IB_QPT_RC);
1862*4882a593Smuzhiyun 	if (IS_ERR(cm_id)) {
1863*4882a593Smuzhiyun 		pr_err("CM ID creation failed\n");
1864*4882a593Smuzhiyun 		return PTR_ERR(cm_id);
1865*4882a593Smuzhiyun 	}
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun 	/*
1868*4882a593Smuzhiyun 	 * Allow both IPv4 and IPv6 sockets to bind a single port
1869*4882a593Smuzhiyun 	 * at the same time.
1870*4882a593Smuzhiyun 	 */
1871*4882a593Smuzhiyun 	ret = rdma_set_afonly(cm_id, 1);
1872*4882a593Smuzhiyun 	if (ret) {
1873*4882a593Smuzhiyun 		pr_err("rdma_set_afonly failed (%d)\n", ret);
1874*4882a593Smuzhiyun 		goto out_destroy_id;
1875*4882a593Smuzhiyun 	}
1876*4882a593Smuzhiyun 
1877*4882a593Smuzhiyun 	ret = rdma_bind_addr(cm_id, addr);
1878*4882a593Smuzhiyun 	if (ret) {
1879*4882a593Smuzhiyun 		pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret);
1880*4882a593Smuzhiyun 		goto out_destroy_id;
1881*4882a593Smuzhiyun 	}
1882*4882a593Smuzhiyun 
1883*4882a593Smuzhiyun 	ret = rdma_listen(cm_id, 128);
1884*4882a593Smuzhiyun 	if (ret) {
1885*4882a593Smuzhiyun 		pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
1886*4882a593Smuzhiyun 		goto out_destroy_id;
1887*4882a593Smuzhiyun 	}
1888*4882a593Smuzhiyun 
1889*4882a593Smuzhiyun 	port->cm_id = cm_id;
1890*4882a593Smuzhiyun 	return 0;
1891*4882a593Smuzhiyun 
1892*4882a593Smuzhiyun out_destroy_id:
1893*4882a593Smuzhiyun 	rdma_destroy_id(cm_id);
1894*4882a593Smuzhiyun 	return ret;
1895*4882a593Smuzhiyun }
1896*4882a593Smuzhiyun 
nvmet_rdma_repair_port_work(struct work_struct * w)1897*4882a593Smuzhiyun static void nvmet_rdma_repair_port_work(struct work_struct *w)
1898*4882a593Smuzhiyun {
1899*4882a593Smuzhiyun 	struct nvmet_rdma_port *port = container_of(to_delayed_work(w),
1900*4882a593Smuzhiyun 			struct nvmet_rdma_port, repair_work);
1901*4882a593Smuzhiyun 	int ret;
1902*4882a593Smuzhiyun 
1903*4882a593Smuzhiyun 	nvmet_rdma_disable_port(port);
1904*4882a593Smuzhiyun 	ret = nvmet_rdma_enable_port(port);
1905*4882a593Smuzhiyun 	if (ret)
1906*4882a593Smuzhiyun 		schedule_delayed_work(&port->repair_work, 5 * HZ);
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun 
nvmet_rdma_add_port(struct nvmet_port * nport)1909*4882a593Smuzhiyun static int nvmet_rdma_add_port(struct nvmet_port *nport)
1910*4882a593Smuzhiyun {
1911*4882a593Smuzhiyun 	struct nvmet_rdma_port *port;
1912*4882a593Smuzhiyun 	__kernel_sa_family_t af;
1913*4882a593Smuzhiyun 	int ret;
1914*4882a593Smuzhiyun 
1915*4882a593Smuzhiyun 	port = kzalloc(sizeof(*port), GFP_KERNEL);
1916*4882a593Smuzhiyun 	if (!port)
1917*4882a593Smuzhiyun 		return -ENOMEM;
1918*4882a593Smuzhiyun 
1919*4882a593Smuzhiyun 	nport->priv = port;
1920*4882a593Smuzhiyun 	port->nport = nport;
1921*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work);
1922*4882a593Smuzhiyun 
1923*4882a593Smuzhiyun 	switch (nport->disc_addr.adrfam) {
1924*4882a593Smuzhiyun 	case NVMF_ADDR_FAMILY_IP4:
1925*4882a593Smuzhiyun 		af = AF_INET;
1926*4882a593Smuzhiyun 		break;
1927*4882a593Smuzhiyun 	case NVMF_ADDR_FAMILY_IP6:
1928*4882a593Smuzhiyun 		af = AF_INET6;
1929*4882a593Smuzhiyun 		break;
1930*4882a593Smuzhiyun 	default:
1931*4882a593Smuzhiyun 		pr_err("address family %d not supported\n",
1932*4882a593Smuzhiyun 			nport->disc_addr.adrfam);
1933*4882a593Smuzhiyun 		ret = -EINVAL;
1934*4882a593Smuzhiyun 		goto out_free_port;
1935*4882a593Smuzhiyun 	}
1936*4882a593Smuzhiyun 
1937*4882a593Smuzhiyun 	if (nport->inline_data_size < 0) {
1938*4882a593Smuzhiyun 		nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
1939*4882a593Smuzhiyun 	} else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
1940*4882a593Smuzhiyun 		pr_warn("inline_data_size %u is too large, reducing to %u\n",
1941*4882a593Smuzhiyun 			nport->inline_data_size,
1942*4882a593Smuzhiyun 			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
1943*4882a593Smuzhiyun 		nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
1944*4882a593Smuzhiyun 	}
1945*4882a593Smuzhiyun 
1946*4882a593Smuzhiyun 	ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
1947*4882a593Smuzhiyun 			nport->disc_addr.trsvcid, &port->addr);
1948*4882a593Smuzhiyun 	if (ret) {
1949*4882a593Smuzhiyun 		pr_err("malformed ip/port passed: %s:%s\n",
1950*4882a593Smuzhiyun 			nport->disc_addr.traddr, nport->disc_addr.trsvcid);
1951*4882a593Smuzhiyun 		goto out_free_port;
1952*4882a593Smuzhiyun 	}
1953*4882a593Smuzhiyun 
1954*4882a593Smuzhiyun 	ret = nvmet_rdma_enable_port(port);
1955*4882a593Smuzhiyun 	if (ret)
1956*4882a593Smuzhiyun 		goto out_free_port;
1957*4882a593Smuzhiyun 
1958*4882a593Smuzhiyun 	pr_info("enabling port %d (%pISpcs)\n",
1959*4882a593Smuzhiyun 		le16_to_cpu(nport->disc_addr.portid),
1960*4882a593Smuzhiyun 		(struct sockaddr *)&port->addr);
1961*4882a593Smuzhiyun 
1962*4882a593Smuzhiyun 	return 0;
1963*4882a593Smuzhiyun 
1964*4882a593Smuzhiyun out_free_port:
1965*4882a593Smuzhiyun 	kfree(port);
1966*4882a593Smuzhiyun 	return ret;
1967*4882a593Smuzhiyun }
1968*4882a593Smuzhiyun 
nvmet_rdma_remove_port(struct nvmet_port * nport)1969*4882a593Smuzhiyun static void nvmet_rdma_remove_port(struct nvmet_port *nport)
1970*4882a593Smuzhiyun {
1971*4882a593Smuzhiyun 	struct nvmet_rdma_port *port = nport->priv;
1972*4882a593Smuzhiyun 
1973*4882a593Smuzhiyun 	cancel_delayed_work_sync(&port->repair_work);
1974*4882a593Smuzhiyun 	nvmet_rdma_disable_port(port);
1975*4882a593Smuzhiyun 	kfree(port);
1976*4882a593Smuzhiyun }
1977*4882a593Smuzhiyun 
nvmet_rdma_disc_port_addr(struct nvmet_req * req,struct nvmet_port * nport,char * traddr)1978*4882a593Smuzhiyun static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
1979*4882a593Smuzhiyun 		struct nvmet_port *nport, char *traddr)
1980*4882a593Smuzhiyun {
1981*4882a593Smuzhiyun 	struct nvmet_rdma_port *port = nport->priv;
1982*4882a593Smuzhiyun 	struct rdma_cm_id *cm_id = port->cm_id;
1983*4882a593Smuzhiyun 
1984*4882a593Smuzhiyun 	if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
1985*4882a593Smuzhiyun 		struct nvmet_rdma_rsp *rsp =
1986*4882a593Smuzhiyun 			container_of(req, struct nvmet_rdma_rsp, req);
1987*4882a593Smuzhiyun 		struct rdma_cm_id *req_cm_id = rsp->queue->cm_id;
1988*4882a593Smuzhiyun 		struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr;
1989*4882a593Smuzhiyun 
1990*4882a593Smuzhiyun 		sprintf(traddr, "%pISc", addr);
1991*4882a593Smuzhiyun 	} else {
1992*4882a593Smuzhiyun 		memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
1993*4882a593Smuzhiyun 	}
1994*4882a593Smuzhiyun }
1995*4882a593Smuzhiyun 
nvmet_rdma_get_mdts(const struct nvmet_ctrl * ctrl)1996*4882a593Smuzhiyun static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
1997*4882a593Smuzhiyun {
1998*4882a593Smuzhiyun 	if (ctrl->pi_support)
1999*4882a593Smuzhiyun 		return NVMET_RDMA_MAX_METADATA_MDTS;
2000*4882a593Smuzhiyun 	return NVMET_RDMA_MAX_MDTS;
2001*4882a593Smuzhiyun }
2002*4882a593Smuzhiyun 
2003*4882a593Smuzhiyun static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
2004*4882a593Smuzhiyun 	.owner			= THIS_MODULE,
2005*4882a593Smuzhiyun 	.type			= NVMF_TRTYPE_RDMA,
2006*4882a593Smuzhiyun 	.msdbd			= 1,
2007*4882a593Smuzhiyun 	.flags			= NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED,
2008*4882a593Smuzhiyun 	.add_port		= nvmet_rdma_add_port,
2009*4882a593Smuzhiyun 	.remove_port		= nvmet_rdma_remove_port,
2010*4882a593Smuzhiyun 	.queue_response		= nvmet_rdma_queue_response,
2011*4882a593Smuzhiyun 	.delete_ctrl		= nvmet_rdma_delete_ctrl,
2012*4882a593Smuzhiyun 	.disc_traddr		= nvmet_rdma_disc_port_addr,
2013*4882a593Smuzhiyun 	.get_mdts		= nvmet_rdma_get_mdts,
2014*4882a593Smuzhiyun };
2015*4882a593Smuzhiyun 
nvmet_rdma_remove_one(struct ib_device * ib_device,void * client_data)2016*4882a593Smuzhiyun static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
2017*4882a593Smuzhiyun {
2018*4882a593Smuzhiyun 	struct nvmet_rdma_queue *queue, *tmp;
2019*4882a593Smuzhiyun 	struct nvmet_rdma_device *ndev;
2020*4882a593Smuzhiyun 	bool found = false;
2021*4882a593Smuzhiyun 
2022*4882a593Smuzhiyun 	mutex_lock(&device_list_mutex);
2023*4882a593Smuzhiyun 	list_for_each_entry(ndev, &device_list, entry) {
2024*4882a593Smuzhiyun 		if (ndev->device == ib_device) {
2025*4882a593Smuzhiyun 			found = true;
2026*4882a593Smuzhiyun 			break;
2027*4882a593Smuzhiyun 		}
2028*4882a593Smuzhiyun 	}
2029*4882a593Smuzhiyun 	mutex_unlock(&device_list_mutex);
2030*4882a593Smuzhiyun 
2031*4882a593Smuzhiyun 	if (!found)
2032*4882a593Smuzhiyun 		return;
2033*4882a593Smuzhiyun 
2034*4882a593Smuzhiyun 	/*
2035*4882a593Smuzhiyun 	 * IB Device that is used by nvmet controllers is being removed,
2036*4882a593Smuzhiyun 	 * delete all queues using this device.
2037*4882a593Smuzhiyun 	 */
2038*4882a593Smuzhiyun 	mutex_lock(&nvmet_rdma_queue_mutex);
2039*4882a593Smuzhiyun 	list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
2040*4882a593Smuzhiyun 				 queue_list) {
2041*4882a593Smuzhiyun 		if (queue->dev->device != ib_device)
2042*4882a593Smuzhiyun 			continue;
2043*4882a593Smuzhiyun 
2044*4882a593Smuzhiyun 		pr_info("Removing queue %d\n", queue->idx);
2045*4882a593Smuzhiyun 		list_del_init(&queue->queue_list);
2046*4882a593Smuzhiyun 		__nvmet_rdma_queue_disconnect(queue);
2047*4882a593Smuzhiyun 	}
2048*4882a593Smuzhiyun 	mutex_unlock(&nvmet_rdma_queue_mutex);
2049*4882a593Smuzhiyun 
2050*4882a593Smuzhiyun 	flush_scheduled_work();
2051*4882a593Smuzhiyun }
2052*4882a593Smuzhiyun 
2053*4882a593Smuzhiyun static struct ib_client nvmet_rdma_ib_client = {
2054*4882a593Smuzhiyun 	.name   = "nvmet_rdma",
2055*4882a593Smuzhiyun 	.remove = nvmet_rdma_remove_one
2056*4882a593Smuzhiyun };
2057*4882a593Smuzhiyun 
nvmet_rdma_init(void)2058*4882a593Smuzhiyun static int __init nvmet_rdma_init(void)
2059*4882a593Smuzhiyun {
2060*4882a593Smuzhiyun 	int ret;
2061*4882a593Smuzhiyun 
2062*4882a593Smuzhiyun 	ret = ib_register_client(&nvmet_rdma_ib_client);
2063*4882a593Smuzhiyun 	if (ret)
2064*4882a593Smuzhiyun 		return ret;
2065*4882a593Smuzhiyun 
2066*4882a593Smuzhiyun 	ret = nvmet_register_transport(&nvmet_rdma_ops);
2067*4882a593Smuzhiyun 	if (ret)
2068*4882a593Smuzhiyun 		goto err_ib_client;
2069*4882a593Smuzhiyun 
2070*4882a593Smuzhiyun 	return 0;
2071*4882a593Smuzhiyun 
2072*4882a593Smuzhiyun err_ib_client:
2073*4882a593Smuzhiyun 	ib_unregister_client(&nvmet_rdma_ib_client);
2074*4882a593Smuzhiyun 	return ret;
2075*4882a593Smuzhiyun }
2076*4882a593Smuzhiyun 
nvmet_rdma_exit(void)2077*4882a593Smuzhiyun static void __exit nvmet_rdma_exit(void)
2078*4882a593Smuzhiyun {
2079*4882a593Smuzhiyun 	nvmet_unregister_transport(&nvmet_rdma_ops);
2080*4882a593Smuzhiyun 	ib_unregister_client(&nvmet_rdma_ib_client);
2081*4882a593Smuzhiyun 	WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
2082*4882a593Smuzhiyun 	ida_destroy(&nvmet_rdma_queue_ida);
2083*4882a593Smuzhiyun }
2084*4882a593Smuzhiyun 
2085*4882a593Smuzhiyun module_init(nvmet_rdma_init);
2086*4882a593Smuzhiyun module_exit(nvmet_rdma_exit);
2087*4882a593Smuzhiyun 
2088*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
2089*4882a593Smuzhiyun MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */
2090