1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (c) 2016 HGST, a Western Digital Company.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun #include <linux/moduleparam.h>
6*4882a593Smuzhiyun #include <linux/slab.h>
7*4882a593Smuzhiyun #include <linux/pci-p2pdma.h>
8*4882a593Smuzhiyun #include <rdma/mr_pool.h>
9*4882a593Smuzhiyun #include <rdma/rw.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun enum {
12*4882a593Smuzhiyun RDMA_RW_SINGLE_WR,
13*4882a593Smuzhiyun RDMA_RW_MULTI_WR,
14*4882a593Smuzhiyun RDMA_RW_MR,
15*4882a593Smuzhiyun RDMA_RW_SIG_MR,
16*4882a593Smuzhiyun };
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun static bool rdma_rw_force_mr;
19*4882a593Smuzhiyun module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
20*4882a593Smuzhiyun MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun * Report whether memory registration should be used. Memory registration must
24*4882a593Smuzhiyun * be used for iWarp devices because of iWARP-specific limitations. Memory
25*4882a593Smuzhiyun * registration is also enabled if registering memory might yield better
26*4882a593Smuzhiyun * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
27*4882a593Smuzhiyun */
rdma_rw_can_use_mr(struct ib_device * dev,u8 port_num)28*4882a593Smuzhiyun static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun if (rdma_protocol_iwarp(dev, port_num))
31*4882a593Smuzhiyun return true;
32*4882a593Smuzhiyun if (dev->attrs.max_sgl_rd)
33*4882a593Smuzhiyun return true;
34*4882a593Smuzhiyun if (unlikely(rdma_rw_force_mr))
35*4882a593Smuzhiyun return true;
36*4882a593Smuzhiyun return false;
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /*
40*4882a593Smuzhiyun * Check if the device will use memory registration for this RW operation.
41*4882a593Smuzhiyun * For RDMA READs we must use MRs on iWarp and can optionally use them as an
42*4882a593Smuzhiyun * optimization otherwise. Additionally we have a debug option to force usage
43*4882a593Smuzhiyun * of MRs to help testing this code path.
44*4882a593Smuzhiyun */
rdma_rw_io_needs_mr(struct ib_device * dev,u8 port_num,enum dma_data_direction dir,int dma_nents)45*4882a593Smuzhiyun static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
46*4882a593Smuzhiyun enum dma_data_direction dir, int dma_nents)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun if (dir == DMA_FROM_DEVICE) {
49*4882a593Smuzhiyun if (rdma_protocol_iwarp(dev, port_num))
50*4882a593Smuzhiyun return true;
51*4882a593Smuzhiyun if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
52*4882a593Smuzhiyun return true;
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun if (unlikely(rdma_rw_force_mr))
55*4882a593Smuzhiyun return true;
56*4882a593Smuzhiyun return false;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
rdma_rw_fr_page_list_len(struct ib_device * dev,bool pi_support)59*4882a593Smuzhiyun static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
60*4882a593Smuzhiyun bool pi_support)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun u32 max_pages;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun if (pi_support)
65*4882a593Smuzhiyun max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
66*4882a593Smuzhiyun else
67*4882a593Smuzhiyun max_pages = dev->attrs.max_fast_reg_page_list_len;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun /* arbitrary limit to avoid allocating gigantic resources */
70*4882a593Smuzhiyun return min_t(u32, max_pages, 256);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
rdma_rw_inv_key(struct rdma_rw_reg_ctx * reg)73*4882a593Smuzhiyun static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun int count = 0;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun if (reg->mr->need_inval) {
78*4882a593Smuzhiyun reg->inv_wr.opcode = IB_WR_LOCAL_INV;
79*4882a593Smuzhiyun reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
80*4882a593Smuzhiyun reg->inv_wr.next = ®->reg_wr.wr;
81*4882a593Smuzhiyun count++;
82*4882a593Smuzhiyun } else {
83*4882a593Smuzhiyun reg->inv_wr.next = NULL;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun return count;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /* Caller must have zero-initialized *reg. */
rdma_rw_init_one_mr(struct ib_qp * qp,u8 port_num,struct rdma_rw_reg_ctx * reg,struct scatterlist * sg,u32 sg_cnt,u32 offset)90*4882a593Smuzhiyun static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
91*4882a593Smuzhiyun struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
92*4882a593Smuzhiyun u32 sg_cnt, u32 offset)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
95*4882a593Smuzhiyun qp->integrity_en);
96*4882a593Smuzhiyun u32 nents = min(sg_cnt, pages_per_mr);
97*4882a593Smuzhiyun int count = 0, ret;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
100*4882a593Smuzhiyun if (!reg->mr)
101*4882a593Smuzhiyun return -EAGAIN;
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun count += rdma_rw_inv_key(reg);
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
106*4882a593Smuzhiyun if (ret < 0 || ret < nents) {
107*4882a593Smuzhiyun ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
108*4882a593Smuzhiyun return -EINVAL;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun reg->reg_wr.wr.opcode = IB_WR_REG_MR;
112*4882a593Smuzhiyun reg->reg_wr.mr = reg->mr;
113*4882a593Smuzhiyun reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
114*4882a593Smuzhiyun if (rdma_protocol_iwarp(qp->device, port_num))
115*4882a593Smuzhiyun reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
116*4882a593Smuzhiyun count++;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun reg->sge.addr = reg->mr->iova;
119*4882a593Smuzhiyun reg->sge.length = reg->mr->length;
120*4882a593Smuzhiyun return count;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
rdma_rw_init_mr_wrs(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct scatterlist * sg,u32 sg_cnt,u32 offset,u64 remote_addr,u32 rkey,enum dma_data_direction dir)123*4882a593Smuzhiyun static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
124*4882a593Smuzhiyun u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
125*4882a593Smuzhiyun u64 remote_addr, u32 rkey, enum dma_data_direction dir)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun struct rdma_rw_reg_ctx *prev = NULL;
128*4882a593Smuzhiyun u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
129*4882a593Smuzhiyun qp->integrity_en);
130*4882a593Smuzhiyun int i, j, ret = 0, count = 0;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
133*4882a593Smuzhiyun ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
134*4882a593Smuzhiyun if (!ctx->reg) {
135*4882a593Smuzhiyun ret = -ENOMEM;
136*4882a593Smuzhiyun goto out;
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun for (i = 0; i < ctx->nr_ops; i++) {
140*4882a593Smuzhiyun struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
141*4882a593Smuzhiyun u32 nents = min(sg_cnt, pages_per_mr);
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
144*4882a593Smuzhiyun offset);
145*4882a593Smuzhiyun if (ret < 0)
146*4882a593Smuzhiyun goto out_free;
147*4882a593Smuzhiyun count += ret;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun if (prev) {
150*4882a593Smuzhiyun if (reg->mr->need_inval)
151*4882a593Smuzhiyun prev->wr.wr.next = ®->inv_wr;
152*4882a593Smuzhiyun else
153*4882a593Smuzhiyun prev->wr.wr.next = ®->reg_wr.wr;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun reg->reg_wr.wr.next = ®->wr.wr;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun reg->wr.wr.sg_list = ®->sge;
159*4882a593Smuzhiyun reg->wr.wr.num_sge = 1;
160*4882a593Smuzhiyun reg->wr.remote_addr = remote_addr;
161*4882a593Smuzhiyun reg->wr.rkey = rkey;
162*4882a593Smuzhiyun if (dir == DMA_TO_DEVICE) {
163*4882a593Smuzhiyun reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
164*4882a593Smuzhiyun } else if (!rdma_cap_read_inv(qp->device, port_num)) {
165*4882a593Smuzhiyun reg->wr.wr.opcode = IB_WR_RDMA_READ;
166*4882a593Smuzhiyun } else {
167*4882a593Smuzhiyun reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
168*4882a593Smuzhiyun reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun count++;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun remote_addr += reg->sge.length;
173*4882a593Smuzhiyun sg_cnt -= nents;
174*4882a593Smuzhiyun for (j = 0; j < nents; j++)
175*4882a593Smuzhiyun sg = sg_next(sg);
176*4882a593Smuzhiyun prev = reg;
177*4882a593Smuzhiyun offset = 0;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun if (prev)
181*4882a593Smuzhiyun prev->wr.wr.next = NULL;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun ctx->type = RDMA_RW_MR;
184*4882a593Smuzhiyun return count;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun out_free:
187*4882a593Smuzhiyun while (--i >= 0)
188*4882a593Smuzhiyun ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
189*4882a593Smuzhiyun kfree(ctx->reg);
190*4882a593Smuzhiyun out:
191*4882a593Smuzhiyun return ret;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
rdma_rw_init_map_wrs(struct rdma_rw_ctx * ctx,struct ib_qp * qp,struct scatterlist * sg,u32 sg_cnt,u32 offset,u64 remote_addr,u32 rkey,enum dma_data_direction dir)194*4882a593Smuzhiyun static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
195*4882a593Smuzhiyun struct scatterlist *sg, u32 sg_cnt, u32 offset,
196*4882a593Smuzhiyun u64 remote_addr, u32 rkey, enum dma_data_direction dir)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
199*4882a593Smuzhiyun qp->max_read_sge;
200*4882a593Smuzhiyun struct ib_sge *sge;
201*4882a593Smuzhiyun u32 total_len = 0, i, j;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
206*4882a593Smuzhiyun if (!ctx->map.sges)
207*4882a593Smuzhiyun goto out;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
210*4882a593Smuzhiyun if (!ctx->map.wrs)
211*4882a593Smuzhiyun goto out_free_sges;
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun for (i = 0; i < ctx->nr_ops; i++) {
214*4882a593Smuzhiyun struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
215*4882a593Smuzhiyun u32 nr_sge = min(sg_cnt, max_sge);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun if (dir == DMA_TO_DEVICE)
218*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
219*4882a593Smuzhiyun else
220*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_READ;
221*4882a593Smuzhiyun rdma_wr->remote_addr = remote_addr + total_len;
222*4882a593Smuzhiyun rdma_wr->rkey = rkey;
223*4882a593Smuzhiyun rdma_wr->wr.num_sge = nr_sge;
224*4882a593Smuzhiyun rdma_wr->wr.sg_list = sge;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
227*4882a593Smuzhiyun sge->addr = sg_dma_address(sg) + offset;
228*4882a593Smuzhiyun sge->length = sg_dma_len(sg) - offset;
229*4882a593Smuzhiyun sge->lkey = qp->pd->local_dma_lkey;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun total_len += sge->length;
232*4882a593Smuzhiyun sge++;
233*4882a593Smuzhiyun sg_cnt--;
234*4882a593Smuzhiyun offset = 0;
235*4882a593Smuzhiyun }
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
238*4882a593Smuzhiyun &ctx->map.wrs[i + 1].wr : NULL;
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun ctx->type = RDMA_RW_MULTI_WR;
242*4882a593Smuzhiyun return ctx->nr_ops;
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun out_free_sges:
245*4882a593Smuzhiyun kfree(ctx->map.sges);
246*4882a593Smuzhiyun out:
247*4882a593Smuzhiyun return -ENOMEM;
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
rdma_rw_init_single_wr(struct rdma_rw_ctx * ctx,struct ib_qp * qp,struct scatterlist * sg,u32 offset,u64 remote_addr,u32 rkey,enum dma_data_direction dir)250*4882a593Smuzhiyun static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
251*4882a593Smuzhiyun struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
252*4882a593Smuzhiyun enum dma_data_direction dir)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun ctx->nr_ops = 1;
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun ctx->single.sge.lkey = qp->pd->local_dma_lkey;
259*4882a593Smuzhiyun ctx->single.sge.addr = sg_dma_address(sg) + offset;
260*4882a593Smuzhiyun ctx->single.sge.length = sg_dma_len(sg) - offset;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun memset(rdma_wr, 0, sizeof(*rdma_wr));
263*4882a593Smuzhiyun if (dir == DMA_TO_DEVICE)
264*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
265*4882a593Smuzhiyun else
266*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_READ;
267*4882a593Smuzhiyun rdma_wr->wr.sg_list = &ctx->single.sge;
268*4882a593Smuzhiyun rdma_wr->wr.num_sge = 1;
269*4882a593Smuzhiyun rdma_wr->remote_addr = remote_addr;
270*4882a593Smuzhiyun rdma_wr->rkey = rkey;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun ctx->type = RDMA_RW_SINGLE_WR;
273*4882a593Smuzhiyun return 1;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
rdma_rw_unmap_sg(struct ib_device * dev,struct scatterlist * sg,u32 sg_cnt,enum dma_data_direction dir)276*4882a593Smuzhiyun static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
277*4882a593Smuzhiyun u32 sg_cnt, enum dma_data_direction dir)
278*4882a593Smuzhiyun {
279*4882a593Smuzhiyun if (is_pci_p2pdma_page(sg_page(sg)))
280*4882a593Smuzhiyun pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
281*4882a593Smuzhiyun else
282*4882a593Smuzhiyun ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
rdma_rw_map_sg(struct ib_device * dev,struct scatterlist * sg,u32 sg_cnt,enum dma_data_direction dir)285*4882a593Smuzhiyun static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
286*4882a593Smuzhiyun u32 sg_cnt, enum dma_data_direction dir)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun if (is_pci_p2pdma_page(sg_page(sg))) {
289*4882a593Smuzhiyun if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
290*4882a593Smuzhiyun return 0;
291*4882a593Smuzhiyun return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun return ib_dma_map_sg(dev, sg, sg_cnt, dir);
294*4882a593Smuzhiyun }
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun /**
297*4882a593Smuzhiyun * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
298*4882a593Smuzhiyun * @ctx: context to initialize
299*4882a593Smuzhiyun * @qp: queue pair to operate on
300*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
301*4882a593Smuzhiyun * @sg: scatterlist to READ/WRITE from/to
302*4882a593Smuzhiyun * @sg_cnt: number of entries in @sg
303*4882a593Smuzhiyun * @sg_offset: current byte offset into @sg
304*4882a593Smuzhiyun * @remote_addr:remote address to read/write (relative to @rkey)
305*4882a593Smuzhiyun * @rkey: remote key to operate on
306*4882a593Smuzhiyun * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
307*4882a593Smuzhiyun *
308*4882a593Smuzhiyun * Returns the number of WQEs that will be needed on the workqueue if
309*4882a593Smuzhiyun * successful, or a negative error code.
310*4882a593Smuzhiyun */
rdma_rw_ctx_init(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct scatterlist * sg,u32 sg_cnt,u32 sg_offset,u64 remote_addr,u32 rkey,enum dma_data_direction dir)311*4882a593Smuzhiyun int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
312*4882a593Smuzhiyun struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
313*4882a593Smuzhiyun u64 remote_addr, u32 rkey, enum dma_data_direction dir)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun struct ib_device *dev = qp->pd->device;
316*4882a593Smuzhiyun int ret;
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
319*4882a593Smuzhiyun if (!ret)
320*4882a593Smuzhiyun return -ENOMEM;
321*4882a593Smuzhiyun sg_cnt = ret;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun /*
324*4882a593Smuzhiyun * Skip to the S/G entry that sg_offset falls into:
325*4882a593Smuzhiyun */
326*4882a593Smuzhiyun for (;;) {
327*4882a593Smuzhiyun u32 len = sg_dma_len(sg);
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun if (sg_offset < len)
330*4882a593Smuzhiyun break;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun sg = sg_next(sg);
333*4882a593Smuzhiyun sg_offset -= len;
334*4882a593Smuzhiyun sg_cnt--;
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun ret = -EIO;
338*4882a593Smuzhiyun if (WARN_ON_ONCE(sg_cnt == 0))
339*4882a593Smuzhiyun goto out_unmap_sg;
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
342*4882a593Smuzhiyun ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
343*4882a593Smuzhiyun sg_offset, remote_addr, rkey, dir);
344*4882a593Smuzhiyun } else if (sg_cnt > 1) {
345*4882a593Smuzhiyun ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
346*4882a593Smuzhiyun remote_addr, rkey, dir);
347*4882a593Smuzhiyun } else {
348*4882a593Smuzhiyun ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
349*4882a593Smuzhiyun remote_addr, rkey, dir);
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun if (ret < 0)
353*4882a593Smuzhiyun goto out_unmap_sg;
354*4882a593Smuzhiyun return ret;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun out_unmap_sg:
357*4882a593Smuzhiyun rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
358*4882a593Smuzhiyun return ret;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_init);
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun /**
363*4882a593Smuzhiyun * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
364*4882a593Smuzhiyun * @ctx: context to initialize
365*4882a593Smuzhiyun * @qp: queue pair to operate on
366*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
367*4882a593Smuzhiyun * @sg: scatterlist to READ/WRITE from/to
368*4882a593Smuzhiyun * @sg_cnt: number of entries in @sg
369*4882a593Smuzhiyun * @prot_sg: scatterlist to READ/WRITE protection information from/to
370*4882a593Smuzhiyun * @prot_sg_cnt: number of entries in @prot_sg
371*4882a593Smuzhiyun * @sig_attrs: signature offloading algorithms
372*4882a593Smuzhiyun * @remote_addr:remote address to read/write (relative to @rkey)
373*4882a593Smuzhiyun * @rkey: remote key to operate on
374*4882a593Smuzhiyun * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
375*4882a593Smuzhiyun *
376*4882a593Smuzhiyun * Returns the number of WQEs that will be needed on the workqueue if
377*4882a593Smuzhiyun * successful, or a negative error code.
378*4882a593Smuzhiyun */
rdma_rw_ctx_signature_init(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct scatterlist * sg,u32 sg_cnt,struct scatterlist * prot_sg,u32 prot_sg_cnt,struct ib_sig_attrs * sig_attrs,u64 remote_addr,u32 rkey,enum dma_data_direction dir)379*4882a593Smuzhiyun int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
380*4882a593Smuzhiyun u8 port_num, struct scatterlist *sg, u32 sg_cnt,
381*4882a593Smuzhiyun struct scatterlist *prot_sg, u32 prot_sg_cnt,
382*4882a593Smuzhiyun struct ib_sig_attrs *sig_attrs,
383*4882a593Smuzhiyun u64 remote_addr, u32 rkey, enum dma_data_direction dir)
384*4882a593Smuzhiyun {
385*4882a593Smuzhiyun struct ib_device *dev = qp->pd->device;
386*4882a593Smuzhiyun u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
387*4882a593Smuzhiyun qp->integrity_en);
388*4882a593Smuzhiyun struct ib_rdma_wr *rdma_wr;
389*4882a593Smuzhiyun int count = 0, ret;
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
392*4882a593Smuzhiyun pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
393*4882a593Smuzhiyun sg_cnt, prot_sg_cnt, pages_per_mr);
394*4882a593Smuzhiyun return -EINVAL;
395*4882a593Smuzhiyun }
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
398*4882a593Smuzhiyun if (!ret)
399*4882a593Smuzhiyun return -ENOMEM;
400*4882a593Smuzhiyun sg_cnt = ret;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun if (prot_sg_cnt) {
403*4882a593Smuzhiyun ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir);
404*4882a593Smuzhiyun if (!ret) {
405*4882a593Smuzhiyun ret = -ENOMEM;
406*4882a593Smuzhiyun goto out_unmap_sg;
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun prot_sg_cnt = ret;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun ctx->type = RDMA_RW_SIG_MR;
412*4882a593Smuzhiyun ctx->nr_ops = 1;
413*4882a593Smuzhiyun ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
414*4882a593Smuzhiyun if (!ctx->reg) {
415*4882a593Smuzhiyun ret = -ENOMEM;
416*4882a593Smuzhiyun goto out_unmap_prot_sg;
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
420*4882a593Smuzhiyun if (!ctx->reg->mr) {
421*4882a593Smuzhiyun ret = -EAGAIN;
422*4882a593Smuzhiyun goto out_free_ctx;
423*4882a593Smuzhiyun }
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun count += rdma_rw_inv_key(ctx->reg);
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg,
430*4882a593Smuzhiyun prot_sg_cnt, NULL, SZ_4K);
431*4882a593Smuzhiyun if (unlikely(ret)) {
432*4882a593Smuzhiyun pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt);
433*4882a593Smuzhiyun goto out_destroy_sig_mr;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
437*4882a593Smuzhiyun ctx->reg->reg_wr.wr.wr_cqe = NULL;
438*4882a593Smuzhiyun ctx->reg->reg_wr.wr.num_sge = 0;
439*4882a593Smuzhiyun ctx->reg->reg_wr.wr.send_flags = 0;
440*4882a593Smuzhiyun ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
441*4882a593Smuzhiyun if (rdma_protocol_iwarp(qp->device, port_num))
442*4882a593Smuzhiyun ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
443*4882a593Smuzhiyun ctx->reg->reg_wr.mr = ctx->reg->mr;
444*4882a593Smuzhiyun ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
445*4882a593Smuzhiyun count++;
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun ctx->reg->sge.addr = ctx->reg->mr->iova;
448*4882a593Smuzhiyun ctx->reg->sge.length = ctx->reg->mr->length;
449*4882a593Smuzhiyun if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
450*4882a593Smuzhiyun ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun rdma_wr = &ctx->reg->wr;
453*4882a593Smuzhiyun rdma_wr->wr.sg_list = &ctx->reg->sge;
454*4882a593Smuzhiyun rdma_wr->wr.num_sge = 1;
455*4882a593Smuzhiyun rdma_wr->remote_addr = remote_addr;
456*4882a593Smuzhiyun rdma_wr->rkey = rkey;
457*4882a593Smuzhiyun if (dir == DMA_TO_DEVICE)
458*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
459*4882a593Smuzhiyun else
460*4882a593Smuzhiyun rdma_wr->wr.opcode = IB_WR_RDMA_READ;
461*4882a593Smuzhiyun ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
462*4882a593Smuzhiyun count++;
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun return count;
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun out_destroy_sig_mr:
467*4882a593Smuzhiyun ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
468*4882a593Smuzhiyun out_free_ctx:
469*4882a593Smuzhiyun kfree(ctx->reg);
470*4882a593Smuzhiyun out_unmap_prot_sg:
471*4882a593Smuzhiyun if (prot_sg_cnt)
472*4882a593Smuzhiyun rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
473*4882a593Smuzhiyun out_unmap_sg:
474*4882a593Smuzhiyun rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
475*4882a593Smuzhiyun return ret;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /*
480*4882a593Smuzhiyun * Now that we are going to post the WRs we can update the lkey and need_inval
481*4882a593Smuzhiyun * state on the MRs. If we were doing this at init time, we would get double
482*4882a593Smuzhiyun * or missing invalidations if a context was initialized but not actually
483*4882a593Smuzhiyun * posted.
484*4882a593Smuzhiyun */
rdma_rw_update_lkey(struct rdma_rw_reg_ctx * reg,bool need_inval)485*4882a593Smuzhiyun static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
486*4882a593Smuzhiyun {
487*4882a593Smuzhiyun reg->mr->need_inval = need_inval;
488*4882a593Smuzhiyun ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
489*4882a593Smuzhiyun reg->reg_wr.key = reg->mr->lkey;
490*4882a593Smuzhiyun reg->sge.lkey = reg->mr->lkey;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun /**
494*4882a593Smuzhiyun * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
495*4882a593Smuzhiyun * @ctx: context to operate on
496*4882a593Smuzhiyun * @qp: queue pair to operate on
497*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
498*4882a593Smuzhiyun * @cqe: completion queue entry for the last WR
499*4882a593Smuzhiyun * @chain_wr: WR to append to the posted chain
500*4882a593Smuzhiyun *
501*4882a593Smuzhiyun * Return the WR chain for the set of RDMA READ/WRITE operations described by
502*4882a593Smuzhiyun * @ctx, as well as any memory registration operations needed. If @chain_wr
503*4882a593Smuzhiyun * is non-NULL the WR it points to will be appended to the chain of WRs posted.
504*4882a593Smuzhiyun * If @chain_wr is not set @cqe must be set so that the caller gets a
505*4882a593Smuzhiyun * completion notification.
506*4882a593Smuzhiyun */
rdma_rw_ctx_wrs(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct ib_cqe * cqe,struct ib_send_wr * chain_wr)507*4882a593Smuzhiyun struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
508*4882a593Smuzhiyun u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
509*4882a593Smuzhiyun {
510*4882a593Smuzhiyun struct ib_send_wr *first_wr, *last_wr;
511*4882a593Smuzhiyun int i;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun switch (ctx->type) {
514*4882a593Smuzhiyun case RDMA_RW_SIG_MR:
515*4882a593Smuzhiyun case RDMA_RW_MR:
516*4882a593Smuzhiyun for (i = 0; i < ctx->nr_ops; i++) {
517*4882a593Smuzhiyun rdma_rw_update_lkey(&ctx->reg[i],
518*4882a593Smuzhiyun ctx->reg[i].wr.wr.opcode !=
519*4882a593Smuzhiyun IB_WR_RDMA_READ_WITH_INV);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun if (ctx->reg[0].inv_wr.next)
523*4882a593Smuzhiyun first_wr = &ctx->reg[0].inv_wr;
524*4882a593Smuzhiyun else
525*4882a593Smuzhiyun first_wr = &ctx->reg[0].reg_wr.wr;
526*4882a593Smuzhiyun last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
527*4882a593Smuzhiyun break;
528*4882a593Smuzhiyun case RDMA_RW_MULTI_WR:
529*4882a593Smuzhiyun first_wr = &ctx->map.wrs[0].wr;
530*4882a593Smuzhiyun last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
531*4882a593Smuzhiyun break;
532*4882a593Smuzhiyun case RDMA_RW_SINGLE_WR:
533*4882a593Smuzhiyun first_wr = &ctx->single.wr.wr;
534*4882a593Smuzhiyun last_wr = &ctx->single.wr.wr;
535*4882a593Smuzhiyun break;
536*4882a593Smuzhiyun default:
537*4882a593Smuzhiyun BUG();
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun if (chain_wr) {
541*4882a593Smuzhiyun last_wr->next = chain_wr;
542*4882a593Smuzhiyun } else {
543*4882a593Smuzhiyun last_wr->wr_cqe = cqe;
544*4882a593Smuzhiyun last_wr->send_flags |= IB_SEND_SIGNALED;
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun return first_wr;
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_wrs);
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun /**
552*4882a593Smuzhiyun * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
553*4882a593Smuzhiyun * @ctx: context to operate on
554*4882a593Smuzhiyun * @qp: queue pair to operate on
555*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
556*4882a593Smuzhiyun * @cqe: completion queue entry for the last WR
557*4882a593Smuzhiyun * @chain_wr: WR to append to the posted chain
558*4882a593Smuzhiyun *
559*4882a593Smuzhiyun * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
560*4882a593Smuzhiyun * any memory registration operations needed. If @chain_wr is non-NULL the
561*4882a593Smuzhiyun * WR it points to will be appended to the chain of WRs posted. If @chain_wr
562*4882a593Smuzhiyun * is not set @cqe must be set so that the caller gets a completion
563*4882a593Smuzhiyun * notification.
564*4882a593Smuzhiyun */
rdma_rw_ctx_post(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct ib_cqe * cqe,struct ib_send_wr * chain_wr)565*4882a593Smuzhiyun int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
566*4882a593Smuzhiyun struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun struct ib_send_wr *first_wr;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
571*4882a593Smuzhiyun return ib_post_send(qp, first_wr, NULL);
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_post);
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun /**
576*4882a593Smuzhiyun * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
577*4882a593Smuzhiyun * @ctx: context to release
578*4882a593Smuzhiyun * @qp: queue pair to operate on
579*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
580*4882a593Smuzhiyun * @sg: scatterlist that was used for the READ/WRITE
581*4882a593Smuzhiyun * @sg_cnt: number of entries in @sg
582*4882a593Smuzhiyun * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
583*4882a593Smuzhiyun */
rdma_rw_ctx_destroy(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct scatterlist * sg,u32 sg_cnt,enum dma_data_direction dir)584*4882a593Smuzhiyun void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
585*4882a593Smuzhiyun struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun int i;
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun switch (ctx->type) {
590*4882a593Smuzhiyun case RDMA_RW_MR:
591*4882a593Smuzhiyun for (i = 0; i < ctx->nr_ops; i++)
592*4882a593Smuzhiyun ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
593*4882a593Smuzhiyun kfree(ctx->reg);
594*4882a593Smuzhiyun break;
595*4882a593Smuzhiyun case RDMA_RW_MULTI_WR:
596*4882a593Smuzhiyun kfree(ctx->map.wrs);
597*4882a593Smuzhiyun kfree(ctx->map.sges);
598*4882a593Smuzhiyun break;
599*4882a593Smuzhiyun case RDMA_RW_SINGLE_WR:
600*4882a593Smuzhiyun break;
601*4882a593Smuzhiyun default:
602*4882a593Smuzhiyun BUG();
603*4882a593Smuzhiyun break;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_destroy);
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun /**
611*4882a593Smuzhiyun * rdma_rw_ctx_destroy_signature - release all resources allocated by
612*4882a593Smuzhiyun * rdma_rw_ctx_signature_init
613*4882a593Smuzhiyun * @ctx: context to release
614*4882a593Smuzhiyun * @qp: queue pair to operate on
615*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
616*4882a593Smuzhiyun * @sg: scatterlist that was used for the READ/WRITE
617*4882a593Smuzhiyun * @sg_cnt: number of entries in @sg
618*4882a593Smuzhiyun * @prot_sg: scatterlist that was used for the READ/WRITE of the PI
619*4882a593Smuzhiyun * @prot_sg_cnt: number of entries in @prot_sg
620*4882a593Smuzhiyun * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
621*4882a593Smuzhiyun */
rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx * ctx,struct ib_qp * qp,u8 port_num,struct scatterlist * sg,u32 sg_cnt,struct scatterlist * prot_sg,u32 prot_sg_cnt,enum dma_data_direction dir)622*4882a593Smuzhiyun void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
623*4882a593Smuzhiyun u8 port_num, struct scatterlist *sg, u32 sg_cnt,
624*4882a593Smuzhiyun struct scatterlist *prot_sg, u32 prot_sg_cnt,
625*4882a593Smuzhiyun enum dma_data_direction dir)
626*4882a593Smuzhiyun {
627*4882a593Smuzhiyun if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
628*4882a593Smuzhiyun return;
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
631*4882a593Smuzhiyun kfree(ctx->reg);
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun if (prot_sg_cnt)
634*4882a593Smuzhiyun rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
635*4882a593Smuzhiyun rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun /**
640*4882a593Smuzhiyun * rdma_rw_mr_factor - return number of MRs required for a payload
641*4882a593Smuzhiyun * @device: device handling the connection
642*4882a593Smuzhiyun * @port_num: port num to which the connection is bound
643*4882a593Smuzhiyun * @maxpages: maximum payload pages per rdma_rw_ctx
644*4882a593Smuzhiyun *
645*4882a593Smuzhiyun * Returns the number of MRs the device requires to move @maxpayload
646*4882a593Smuzhiyun * bytes. The returned value is used during transport creation to
647*4882a593Smuzhiyun * compute max_rdma_ctxts and the size of the transport's Send and
648*4882a593Smuzhiyun * Send Completion Queues.
649*4882a593Smuzhiyun */
rdma_rw_mr_factor(struct ib_device * device,u8 port_num,unsigned int maxpages)650*4882a593Smuzhiyun unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
651*4882a593Smuzhiyun unsigned int maxpages)
652*4882a593Smuzhiyun {
653*4882a593Smuzhiyun unsigned int mr_pages;
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun if (rdma_rw_can_use_mr(device, port_num))
656*4882a593Smuzhiyun mr_pages = rdma_rw_fr_page_list_len(device, false);
657*4882a593Smuzhiyun else
658*4882a593Smuzhiyun mr_pages = device->attrs.max_sge_rd;
659*4882a593Smuzhiyun return DIV_ROUND_UP(maxpages, mr_pages);
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun EXPORT_SYMBOL(rdma_rw_mr_factor);
662*4882a593Smuzhiyun
rdma_rw_init_qp(struct ib_device * dev,struct ib_qp_init_attr * attr)663*4882a593Smuzhiyun void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
664*4882a593Smuzhiyun {
665*4882a593Smuzhiyun u32 factor;
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun WARN_ON_ONCE(attr->port_num == 0);
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun /*
670*4882a593Smuzhiyun * Each context needs at least one RDMA READ or WRITE WR.
671*4882a593Smuzhiyun *
672*4882a593Smuzhiyun * For some hardware we might need more, eventually we should ask the
673*4882a593Smuzhiyun * HCA driver for a multiplier here.
674*4882a593Smuzhiyun */
675*4882a593Smuzhiyun factor = 1;
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun /*
678*4882a593Smuzhiyun * If the devices needs MRs to perform RDMA READ or WRITE operations,
679*4882a593Smuzhiyun * we'll need two additional MRs for the registrations and the
680*4882a593Smuzhiyun * invalidation.
681*4882a593Smuzhiyun */
682*4882a593Smuzhiyun if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
683*4882a593Smuzhiyun rdma_rw_can_use_mr(dev, attr->port_num))
684*4882a593Smuzhiyun factor += 2; /* inv + reg */
685*4882a593Smuzhiyun
686*4882a593Smuzhiyun attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun /*
689*4882a593Smuzhiyun * But maybe we were just too high in the sky and the device doesn't
690*4882a593Smuzhiyun * even support all we need, and we'll have to live with what we get..
691*4882a593Smuzhiyun */
692*4882a593Smuzhiyun attr->cap.max_send_wr =
693*4882a593Smuzhiyun min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun
rdma_rw_init_mrs(struct ib_qp * qp,struct ib_qp_init_attr * attr)696*4882a593Smuzhiyun int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
697*4882a593Smuzhiyun {
698*4882a593Smuzhiyun struct ib_device *dev = qp->pd->device;
699*4882a593Smuzhiyun u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
700*4882a593Smuzhiyun int ret = 0;
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
703*4882a593Smuzhiyun nr_sig_mrs = attr->cap.max_rdma_ctxs;
704*4882a593Smuzhiyun nr_mrs = attr->cap.max_rdma_ctxs;
705*4882a593Smuzhiyun max_num_sg = rdma_rw_fr_page_list_len(dev, true);
706*4882a593Smuzhiyun } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
707*4882a593Smuzhiyun nr_mrs = attr->cap.max_rdma_ctxs;
708*4882a593Smuzhiyun max_num_sg = rdma_rw_fr_page_list_len(dev, false);
709*4882a593Smuzhiyun }
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun if (nr_mrs) {
712*4882a593Smuzhiyun ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
713*4882a593Smuzhiyun IB_MR_TYPE_MEM_REG,
714*4882a593Smuzhiyun max_num_sg, 0);
715*4882a593Smuzhiyun if (ret) {
716*4882a593Smuzhiyun pr_err("%s: failed to allocated %d MRs\n",
717*4882a593Smuzhiyun __func__, nr_mrs);
718*4882a593Smuzhiyun return ret;
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun }
721*4882a593Smuzhiyun
722*4882a593Smuzhiyun if (nr_sig_mrs) {
723*4882a593Smuzhiyun ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
724*4882a593Smuzhiyun IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
725*4882a593Smuzhiyun if (ret) {
726*4882a593Smuzhiyun pr_err("%s: failed to allocated %d SIG MRs\n",
727*4882a593Smuzhiyun __func__, nr_sig_mrs);
728*4882a593Smuzhiyun goto out_free_rdma_mrs;
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun return 0;
733*4882a593Smuzhiyun
734*4882a593Smuzhiyun out_free_rdma_mrs:
735*4882a593Smuzhiyun ib_mr_pool_destroy(qp, &qp->rdma_mrs);
736*4882a593Smuzhiyun return ret;
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun
rdma_rw_cleanup_mrs(struct ib_qp * qp)739*4882a593Smuzhiyun void rdma_rw_cleanup_mrs(struct ib_qp *qp)
740*4882a593Smuzhiyun {
741*4882a593Smuzhiyun ib_mr_pool_destroy(qp, &qp->sig_mrs);
742*4882a593Smuzhiyun ib_mr_pool_destroy(qp, &qp->rdma_mrs);
743*4882a593Smuzhiyun }
744