1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4*4882a593Smuzhiyun /* Copyright (c) 2008-2019, IBM Corporation */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/errno.h>
7*4882a593Smuzhiyun #include <linux/types.h>
8*4882a593Smuzhiyun #include <linux/net.h>
9*4882a593Smuzhiyun #include <linux/scatterlist.h>
10*4882a593Smuzhiyun #include <linux/llist.h>
11*4882a593Smuzhiyun #include <asm/barrier.h>
12*4882a593Smuzhiyun #include <net/tcp.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include "siw.h"
15*4882a593Smuzhiyun #include "siw_verbs.h"
16*4882a593Smuzhiyun #include "siw_mem.h"
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
19*4882a593Smuzhiyun [SIW_QP_STATE_IDLE] = "IDLE",
20*4882a593Smuzhiyun [SIW_QP_STATE_RTR] = "RTR",
21*4882a593Smuzhiyun [SIW_QP_STATE_RTS] = "RTS",
22*4882a593Smuzhiyun [SIW_QP_STATE_CLOSING] = "CLOSING",
23*4882a593Smuzhiyun [SIW_QP_STATE_TERMINATE] = "TERMINATE",
24*4882a593Smuzhiyun [SIW_QP_STATE_ERROR] = "ERROR"
25*4882a593Smuzhiyun };
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun /*
28*4882a593Smuzhiyun * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
29*4882a593Smuzhiyun * per-RDMAP message basis. Please keep order of initializer. All MPA len
30*4882a593Smuzhiyun * is initialized to minimum packet size.
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
33*4882a593Smuzhiyun { /* RDMAP_RDMA_WRITE */
34*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_rdma_write),
35*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
36*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
37*4882a593Smuzhiyun cpu_to_be16(DDP_VERSION << 8) |
38*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
39*4882a593Smuzhiyun cpu_to_be16(RDMAP_RDMA_WRITE),
40*4882a593Smuzhiyun .rx_data = siw_proc_write },
41*4882a593Smuzhiyun { /* RDMAP_RDMA_READ_REQ */
42*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_rdma_rreq),
43*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
44*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
45*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
46*4882a593Smuzhiyun cpu_to_be16(RDMAP_RDMA_READ_REQ),
47*4882a593Smuzhiyun .rx_data = siw_proc_rreq },
48*4882a593Smuzhiyun { /* RDMAP_RDMA_READ_RESP */
49*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_rdma_rresp),
50*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
51*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
52*4882a593Smuzhiyun cpu_to_be16(DDP_VERSION << 8) |
53*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
54*4882a593Smuzhiyun cpu_to_be16(RDMAP_RDMA_READ_RESP),
55*4882a593Smuzhiyun .rx_data = siw_proc_rresp },
56*4882a593Smuzhiyun { /* RDMAP_SEND */
57*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_send),
58*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
59*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
60*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
61*4882a593Smuzhiyun cpu_to_be16(RDMAP_SEND),
62*4882a593Smuzhiyun .rx_data = siw_proc_send },
63*4882a593Smuzhiyun { /* RDMAP_SEND_INVAL */
64*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_send_inv),
65*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
66*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
67*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
68*4882a593Smuzhiyun cpu_to_be16(RDMAP_SEND_INVAL),
69*4882a593Smuzhiyun .rx_data = siw_proc_send },
70*4882a593Smuzhiyun { /* RDMAP_SEND_SE */
71*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_send),
72*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
73*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
74*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
75*4882a593Smuzhiyun cpu_to_be16(RDMAP_SEND_SE),
76*4882a593Smuzhiyun .rx_data = siw_proc_send },
77*4882a593Smuzhiyun { /* RDMAP_SEND_SE_INVAL */
78*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_send_inv),
79*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
80*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
81*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
82*4882a593Smuzhiyun cpu_to_be16(RDMAP_SEND_SE_INVAL),
83*4882a593Smuzhiyun .rx_data = siw_proc_send },
84*4882a593Smuzhiyun { /* RDMAP_TERMINATE */
85*4882a593Smuzhiyun .hdr_len = sizeof(struct iwarp_terminate),
86*4882a593Smuzhiyun .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
87*4882a593Smuzhiyun .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
88*4882a593Smuzhiyun cpu_to_be16(RDMAP_VERSION << 6) |
89*4882a593Smuzhiyun cpu_to_be16(RDMAP_TERMINATE),
90*4882a593Smuzhiyun .rx_data = siw_proc_terminate }
91*4882a593Smuzhiyun };
92*4882a593Smuzhiyun
siw_qp_llp_data_ready(struct sock * sk)93*4882a593Smuzhiyun void siw_qp_llp_data_ready(struct sock *sk)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun struct siw_qp *qp;
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun read_lock(&sk->sk_callback_lock);
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
100*4882a593Smuzhiyun goto done;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun qp = sk_to_qp(sk);
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun if (likely(!qp->rx_stream.rx_suspend &&
105*4882a593Smuzhiyun down_read_trylock(&qp->state_lock))) {
106*4882a593Smuzhiyun read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
109*4882a593Smuzhiyun /*
110*4882a593Smuzhiyun * Implements data receive operation during
111*4882a593Smuzhiyun * socket callback. TCP gracefully catches
112*4882a593Smuzhiyun * the case where there is nothing to receive
113*4882a593Smuzhiyun * (not calling siw_tcp_rx_data() then).
114*4882a593Smuzhiyun */
115*4882a593Smuzhiyun tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun up_read(&qp->state_lock);
118*4882a593Smuzhiyun } else {
119*4882a593Smuzhiyun siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
120*4882a593Smuzhiyun qp->rx_stream.rx_suspend);
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun done:
123*4882a593Smuzhiyun read_unlock(&sk->sk_callback_lock);
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
siw_qp_llp_close(struct siw_qp * qp)126*4882a593Smuzhiyun void siw_qp_llp_close(struct siw_qp *qp)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun siw_dbg_qp(qp, "enter llp close, state = %s\n",
129*4882a593Smuzhiyun siw_qp_state_to_string[qp->attrs.state]);
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun down_write(&qp->state_lock);
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun qp->rx_stream.rx_suspend = 1;
134*4882a593Smuzhiyun qp->tx_ctx.tx_suspend = 1;
135*4882a593Smuzhiyun qp->attrs.sk = NULL;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun switch (qp->attrs.state) {
138*4882a593Smuzhiyun case SIW_QP_STATE_RTS:
139*4882a593Smuzhiyun case SIW_QP_STATE_RTR:
140*4882a593Smuzhiyun case SIW_QP_STATE_IDLE:
141*4882a593Smuzhiyun case SIW_QP_STATE_TERMINATE:
142*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
143*4882a593Smuzhiyun break;
144*4882a593Smuzhiyun /*
145*4882a593Smuzhiyun * SIW_QP_STATE_CLOSING:
146*4882a593Smuzhiyun *
147*4882a593Smuzhiyun * This is a forced close. shall the QP be moved to
148*4882a593Smuzhiyun * ERROR or IDLE ?
149*4882a593Smuzhiyun */
150*4882a593Smuzhiyun case SIW_QP_STATE_CLOSING:
151*4882a593Smuzhiyun if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
152*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
153*4882a593Smuzhiyun else
154*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_IDLE;
155*4882a593Smuzhiyun break;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun default:
158*4882a593Smuzhiyun siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
159*4882a593Smuzhiyun siw_qp_state_to_string[qp->attrs.state]);
160*4882a593Smuzhiyun break;
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun siw_sq_flush(qp);
163*4882a593Smuzhiyun siw_rq_flush(qp);
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun /*
166*4882a593Smuzhiyun * Dereference closing CEP
167*4882a593Smuzhiyun */
168*4882a593Smuzhiyun if (qp->cep) {
169*4882a593Smuzhiyun siw_cep_put(qp->cep);
170*4882a593Smuzhiyun qp->cep = NULL;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun up_write(&qp->state_lock);
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun siw_dbg_qp(qp, "llp close exit: state %s\n",
176*4882a593Smuzhiyun siw_qp_state_to_string[qp->attrs.state]);
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /*
180*4882a593Smuzhiyun * socket callback routine informing about newly available send space.
181*4882a593Smuzhiyun * Function schedules SQ work for processing SQ items.
182*4882a593Smuzhiyun */
siw_qp_llp_write_space(struct sock * sk)183*4882a593Smuzhiyun void siw_qp_llp_write_space(struct sock *sk)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun struct siw_cep *cep;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun read_lock(&sk->sk_callback_lock);
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun cep = sk_to_cep(sk);
190*4882a593Smuzhiyun if (cep) {
191*4882a593Smuzhiyun cep->sk_write_space(sk);
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
194*4882a593Smuzhiyun (void)siw_sq_start(cep->qp);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun read_unlock(&sk->sk_callback_lock);
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
siw_qp_readq_init(struct siw_qp * qp,int irq_size,int orq_size)200*4882a593Smuzhiyun static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun if (irq_size) {
203*4882a593Smuzhiyun irq_size = roundup_pow_of_two(irq_size);
204*4882a593Smuzhiyun qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
205*4882a593Smuzhiyun if (!qp->irq) {
206*4882a593Smuzhiyun qp->attrs.irq_size = 0;
207*4882a593Smuzhiyun return -ENOMEM;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun if (orq_size) {
211*4882a593Smuzhiyun orq_size = roundup_pow_of_two(orq_size);
212*4882a593Smuzhiyun qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
213*4882a593Smuzhiyun if (!qp->orq) {
214*4882a593Smuzhiyun qp->attrs.orq_size = 0;
215*4882a593Smuzhiyun qp->attrs.irq_size = 0;
216*4882a593Smuzhiyun vfree(qp->irq);
217*4882a593Smuzhiyun return -ENOMEM;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun qp->attrs.irq_size = irq_size;
221*4882a593Smuzhiyun qp->attrs.orq_size = orq_size;
222*4882a593Smuzhiyun siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
223*4882a593Smuzhiyun return 0;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
siw_qp_enable_crc(struct siw_qp * qp)226*4882a593Smuzhiyun static int siw_qp_enable_crc(struct siw_qp *qp)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun struct siw_rx_stream *c_rx = &qp->rx_stream;
229*4882a593Smuzhiyun struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
230*4882a593Smuzhiyun int size;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun if (siw_crypto_shash == NULL)
233*4882a593Smuzhiyun return -ENOENT;
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun size = crypto_shash_descsize(siw_crypto_shash) +
236*4882a593Smuzhiyun sizeof(struct shash_desc);
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
239*4882a593Smuzhiyun c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
240*4882a593Smuzhiyun if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
241*4882a593Smuzhiyun kfree(c_tx->mpa_crc_hd);
242*4882a593Smuzhiyun kfree(c_rx->mpa_crc_hd);
243*4882a593Smuzhiyun c_tx->mpa_crc_hd = NULL;
244*4882a593Smuzhiyun c_rx->mpa_crc_hd = NULL;
245*4882a593Smuzhiyun return -ENOMEM;
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
248*4882a593Smuzhiyun c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun return 0;
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun /*
254*4882a593Smuzhiyun * Send a non signalled READ or WRITE to peer side as negotiated
255*4882a593Smuzhiyun * with MPAv2 P2P setup protocol. The work request is only created
256*4882a593Smuzhiyun * as a current active WR and does not consume Send Queue space.
257*4882a593Smuzhiyun *
258*4882a593Smuzhiyun * Caller must hold QP state lock.
259*4882a593Smuzhiyun */
siw_qp_mpa_rts(struct siw_qp * qp,enum mpa_v2_ctrl ctrl)260*4882a593Smuzhiyun int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
261*4882a593Smuzhiyun {
262*4882a593Smuzhiyun struct siw_wqe *wqe = tx_wqe(qp);
263*4882a593Smuzhiyun unsigned long flags;
264*4882a593Smuzhiyun int rv = 0;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun spin_lock_irqsave(&qp->sq_lock, flags);
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
269*4882a593Smuzhiyun spin_unlock_irqrestore(&qp->sq_lock, flags);
270*4882a593Smuzhiyun return -EIO;
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun wqe->wr_status = SIW_WR_QUEUED;
275*4882a593Smuzhiyun wqe->sqe.flags = 0;
276*4882a593Smuzhiyun wqe->sqe.num_sge = 1;
277*4882a593Smuzhiyun wqe->sqe.sge[0].length = 0;
278*4882a593Smuzhiyun wqe->sqe.sge[0].laddr = 0;
279*4882a593Smuzhiyun wqe->sqe.sge[0].lkey = 0;
280*4882a593Smuzhiyun /*
281*4882a593Smuzhiyun * While it must not be checked for inbound zero length
282*4882a593Smuzhiyun * READ/WRITE, some HW may treat STag 0 special.
283*4882a593Smuzhiyun */
284*4882a593Smuzhiyun wqe->sqe.rkey = 1;
285*4882a593Smuzhiyun wqe->sqe.raddr = 0;
286*4882a593Smuzhiyun wqe->processed = 0;
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun if (ctrl & MPA_V2_RDMA_WRITE_RTR)
289*4882a593Smuzhiyun wqe->sqe.opcode = SIW_OP_WRITE;
290*4882a593Smuzhiyun else if (ctrl & MPA_V2_RDMA_READ_RTR) {
291*4882a593Smuzhiyun struct siw_sqe *rreq = NULL;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun wqe->sqe.opcode = SIW_OP_READ;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun spin_lock(&qp->orq_lock);
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun if (qp->attrs.orq_size)
298*4882a593Smuzhiyun rreq = orq_get_free(qp);
299*4882a593Smuzhiyun if (rreq) {
300*4882a593Smuzhiyun siw_read_to_orq(rreq, &wqe->sqe);
301*4882a593Smuzhiyun qp->orq_put++;
302*4882a593Smuzhiyun } else
303*4882a593Smuzhiyun rv = -EIO;
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun spin_unlock(&qp->orq_lock);
306*4882a593Smuzhiyun } else
307*4882a593Smuzhiyun rv = -EINVAL;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun if (rv)
310*4882a593Smuzhiyun wqe->wr_status = SIW_WR_IDLE;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun spin_unlock_irqrestore(&qp->sq_lock, flags);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun if (!rv)
315*4882a593Smuzhiyun rv = siw_sq_start(qp);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun return rv;
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun /*
321*4882a593Smuzhiyun * Map memory access error to DDP tagged error
322*4882a593Smuzhiyun */
siw_tagged_error(enum siw_access_state state)323*4882a593Smuzhiyun enum ddp_ecode siw_tagged_error(enum siw_access_state state)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun switch (state) {
326*4882a593Smuzhiyun case E_STAG_INVALID:
327*4882a593Smuzhiyun return DDP_ECODE_T_INVALID_STAG;
328*4882a593Smuzhiyun case E_BASE_BOUNDS:
329*4882a593Smuzhiyun return DDP_ECODE_T_BASE_BOUNDS;
330*4882a593Smuzhiyun case E_PD_MISMATCH:
331*4882a593Smuzhiyun return DDP_ECODE_T_STAG_NOT_ASSOC;
332*4882a593Smuzhiyun case E_ACCESS_PERM:
333*4882a593Smuzhiyun /*
334*4882a593Smuzhiyun * RFC 5041 (DDP) lacks an ecode for insufficient access
335*4882a593Smuzhiyun * permissions. 'Invalid STag' seem to be the closest
336*4882a593Smuzhiyun * match though.
337*4882a593Smuzhiyun */
338*4882a593Smuzhiyun return DDP_ECODE_T_INVALID_STAG;
339*4882a593Smuzhiyun default:
340*4882a593Smuzhiyun WARN_ON(1);
341*4882a593Smuzhiyun return DDP_ECODE_T_INVALID_STAG;
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun /*
346*4882a593Smuzhiyun * Map memory access error to RDMAP protection error
347*4882a593Smuzhiyun */
siw_rdmap_error(enum siw_access_state state)348*4882a593Smuzhiyun enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun switch (state) {
351*4882a593Smuzhiyun case E_STAG_INVALID:
352*4882a593Smuzhiyun return RDMAP_ECODE_INVALID_STAG;
353*4882a593Smuzhiyun case E_BASE_BOUNDS:
354*4882a593Smuzhiyun return RDMAP_ECODE_BASE_BOUNDS;
355*4882a593Smuzhiyun case E_PD_MISMATCH:
356*4882a593Smuzhiyun return RDMAP_ECODE_STAG_NOT_ASSOC;
357*4882a593Smuzhiyun case E_ACCESS_PERM:
358*4882a593Smuzhiyun return RDMAP_ECODE_ACCESS_RIGHTS;
359*4882a593Smuzhiyun default:
360*4882a593Smuzhiyun return RDMAP_ECODE_UNSPECIFIED;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
siw_init_terminate(struct siw_qp * qp,enum term_elayer layer,u8 etype,u8 ecode,int in_tx)364*4882a593Smuzhiyun void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
365*4882a593Smuzhiyun u8 ecode, int in_tx)
366*4882a593Smuzhiyun {
367*4882a593Smuzhiyun if (!qp->term_info.valid) {
368*4882a593Smuzhiyun memset(&qp->term_info, 0, sizeof(qp->term_info));
369*4882a593Smuzhiyun qp->term_info.layer = layer;
370*4882a593Smuzhiyun qp->term_info.etype = etype;
371*4882a593Smuzhiyun qp->term_info.ecode = ecode;
372*4882a593Smuzhiyun qp->term_info.in_tx = in_tx;
373*4882a593Smuzhiyun qp->term_info.valid = 1;
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
376*4882a593Smuzhiyun layer, etype, ecode, in_tx ? "yes" : "no");
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun /*
380*4882a593Smuzhiyun * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
381*4882a593Smuzhiyun * Sending TERMINATE messages is best effort - such messages
382*4882a593Smuzhiyun * can only be send if the QP is still connected and it does
383*4882a593Smuzhiyun * not have another outbound message in-progress, i.e. the
384*4882a593Smuzhiyun * TERMINATE message must not interfer with an incomplete current
385*4882a593Smuzhiyun * transmit operation.
386*4882a593Smuzhiyun */
siw_send_terminate(struct siw_qp * qp)387*4882a593Smuzhiyun void siw_send_terminate(struct siw_qp *qp)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun struct kvec iov[3];
390*4882a593Smuzhiyun struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
391*4882a593Smuzhiyun struct iwarp_terminate *term = NULL;
392*4882a593Smuzhiyun union iwarp_hdr *err_hdr = NULL;
393*4882a593Smuzhiyun struct socket *s = qp->attrs.sk;
394*4882a593Smuzhiyun struct siw_rx_stream *srx = &qp->rx_stream;
395*4882a593Smuzhiyun union iwarp_hdr *rx_hdr = &srx->hdr;
396*4882a593Smuzhiyun u32 crc = 0;
397*4882a593Smuzhiyun int num_frags, len_terminate, rv;
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun if (!qp->term_info.valid)
400*4882a593Smuzhiyun return;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun qp->term_info.valid = 0;
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
405*4882a593Smuzhiyun siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
406*4882a593Smuzhiyun tx_type(tx_wqe(qp)));
407*4882a593Smuzhiyun return;
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun if (!s && qp->cep)
410*4882a593Smuzhiyun /* QP not yet in RTS. Take socket from connection end point */
411*4882a593Smuzhiyun s = qp->cep->sock;
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun if (!s) {
414*4882a593Smuzhiyun siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
415*4882a593Smuzhiyun return;
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun term = kzalloc(sizeof(*term), GFP_KERNEL);
419*4882a593Smuzhiyun if (!term)
420*4882a593Smuzhiyun return;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
423*4882a593Smuzhiyun term->ddp_mo = 0;
424*4882a593Smuzhiyun term->ddp_msn = cpu_to_be32(1);
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun iov[0].iov_base = term;
427*4882a593Smuzhiyun iov[0].iov_len = sizeof(*term);
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
430*4882a593Smuzhiyun ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
431*4882a593Smuzhiyun (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
432*4882a593Smuzhiyun err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
433*4882a593Smuzhiyun if (!err_hdr) {
434*4882a593Smuzhiyun kfree(term);
435*4882a593Smuzhiyun return;
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
439*4882a593Smuzhiyun sizeof(struct iwarp_ctrl));
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun __rdmap_term_set_layer(term, qp->term_info.layer);
442*4882a593Smuzhiyun __rdmap_term_set_etype(term, qp->term_info.etype);
443*4882a593Smuzhiyun __rdmap_term_set_ecode(term, qp->term_info.ecode);
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun switch (qp->term_info.layer) {
446*4882a593Smuzhiyun case TERM_ERROR_LAYER_RDMAP:
447*4882a593Smuzhiyun if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
448*4882a593Smuzhiyun /* No additional DDP/RDMAP header to be included */
449*4882a593Smuzhiyun break;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
452*4882a593Smuzhiyun /*
453*4882a593Smuzhiyun * Complete RDMAP frame will get attached, and
454*4882a593Smuzhiyun * DDP segment length is valid
455*4882a593Smuzhiyun */
456*4882a593Smuzhiyun term->flag_m = 1;
457*4882a593Smuzhiyun term->flag_d = 1;
458*4882a593Smuzhiyun term->flag_r = 1;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun if (qp->term_info.in_tx) {
461*4882a593Smuzhiyun struct iwarp_rdma_rreq *rreq;
462*4882a593Smuzhiyun struct siw_wqe *wqe = tx_wqe(qp);
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun /* Inbound RREQ error, detected during
465*4882a593Smuzhiyun * RRESP creation. Take state from
466*4882a593Smuzhiyun * current TX work queue element to
467*4882a593Smuzhiyun * reconstruct peers RREQ.
468*4882a593Smuzhiyun */
469*4882a593Smuzhiyun rreq = (struct iwarp_rdma_rreq *)err_hdr;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun memcpy(&rreq->ctrl,
472*4882a593Smuzhiyun &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
473*4882a593Smuzhiyun sizeof(struct iwarp_ctrl));
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun rreq->rsvd = 0;
476*4882a593Smuzhiyun rreq->ddp_qn =
477*4882a593Smuzhiyun htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /* Provide RREQ's MSN as kept aside */
480*4882a593Smuzhiyun rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun rreq->ddp_mo = htonl(wqe->processed);
483*4882a593Smuzhiyun rreq->sink_stag = htonl(wqe->sqe.rkey);
484*4882a593Smuzhiyun rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
485*4882a593Smuzhiyun rreq->read_size = htonl(wqe->sqe.sge[0].length);
486*4882a593Smuzhiyun rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
487*4882a593Smuzhiyun rreq->source_to =
488*4882a593Smuzhiyun cpu_to_be64(wqe->sqe.sge[0].laddr);
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun iov[1].iov_base = rreq;
491*4882a593Smuzhiyun iov[1].iov_len = sizeof(*rreq);
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun rx_hdr = (union iwarp_hdr *)rreq;
494*4882a593Smuzhiyun } else {
495*4882a593Smuzhiyun /* Take RDMAP/DDP information from
496*4882a593Smuzhiyun * current (failed) inbound frame.
497*4882a593Smuzhiyun */
498*4882a593Smuzhiyun iov[1].iov_base = rx_hdr;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
501*4882a593Smuzhiyun RDMAP_RDMA_READ_REQ)
502*4882a593Smuzhiyun iov[1].iov_len =
503*4882a593Smuzhiyun sizeof(struct iwarp_rdma_rreq);
504*4882a593Smuzhiyun else /* SEND type */
505*4882a593Smuzhiyun iov[1].iov_len =
506*4882a593Smuzhiyun sizeof(struct iwarp_send);
507*4882a593Smuzhiyun }
508*4882a593Smuzhiyun } else {
509*4882a593Smuzhiyun /* Do not report DDP hdr information if packet
510*4882a593Smuzhiyun * layout is unknown
511*4882a593Smuzhiyun */
512*4882a593Smuzhiyun if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
513*4882a593Smuzhiyun (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
514*4882a593Smuzhiyun break;
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun iov[1].iov_base = rx_hdr;
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun /* Only DDP frame will get attached */
519*4882a593Smuzhiyun if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
520*4882a593Smuzhiyun iov[1].iov_len =
521*4882a593Smuzhiyun sizeof(struct iwarp_rdma_write);
522*4882a593Smuzhiyun else
523*4882a593Smuzhiyun iov[1].iov_len = sizeof(struct iwarp_send);
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun term->flag_m = 1;
526*4882a593Smuzhiyun term->flag_d = 1;
527*4882a593Smuzhiyun }
528*4882a593Smuzhiyun term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
529*4882a593Smuzhiyun break;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun case TERM_ERROR_LAYER_DDP:
532*4882a593Smuzhiyun /* Report error encountered while DDP processing.
533*4882a593Smuzhiyun * This can only happen as a result of inbound
534*4882a593Smuzhiyun * DDP processing
535*4882a593Smuzhiyun */
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun /* Do not report DDP hdr information if packet
538*4882a593Smuzhiyun * layout is unknown
539*4882a593Smuzhiyun */
540*4882a593Smuzhiyun if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
541*4882a593Smuzhiyun (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
542*4882a593Smuzhiyun ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
543*4882a593Smuzhiyun (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
544*4882a593Smuzhiyun break;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun iov[1].iov_base = rx_hdr;
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
549*4882a593Smuzhiyun iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
550*4882a593Smuzhiyun else
551*4882a593Smuzhiyun iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun term->flag_m = 1;
554*4882a593Smuzhiyun term->flag_d = 1;
555*4882a593Smuzhiyun break;
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun default:
558*4882a593Smuzhiyun break;
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun if (term->flag_m || term->flag_d || term->flag_r) {
561*4882a593Smuzhiyun iov[2].iov_base = &crc;
562*4882a593Smuzhiyun iov[2].iov_len = sizeof(crc);
563*4882a593Smuzhiyun len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
564*4882a593Smuzhiyun num_frags = 3;
565*4882a593Smuzhiyun } else {
566*4882a593Smuzhiyun iov[1].iov_base = &crc;
567*4882a593Smuzhiyun iov[1].iov_len = sizeof(crc);
568*4882a593Smuzhiyun len_terminate = sizeof(*term) + MPA_CRC_SIZE;
569*4882a593Smuzhiyun num_frags = 2;
570*4882a593Smuzhiyun }
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun /* Adjust DDP Segment Length parameter, if valid */
573*4882a593Smuzhiyun if (term->flag_m) {
574*4882a593Smuzhiyun u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
575*4882a593Smuzhiyun enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
578*4882a593Smuzhiyun rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun term->ctrl.mpa_len =
582*4882a593Smuzhiyun cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
583*4882a593Smuzhiyun if (qp->tx_ctx.mpa_crc_hd) {
584*4882a593Smuzhiyun crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
585*4882a593Smuzhiyun if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
586*4882a593Smuzhiyun (u8 *)iov[0].iov_base,
587*4882a593Smuzhiyun iov[0].iov_len))
588*4882a593Smuzhiyun goto out;
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun if (num_frags == 3) {
591*4882a593Smuzhiyun if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
592*4882a593Smuzhiyun (u8 *)iov[1].iov_base,
593*4882a593Smuzhiyun iov[1].iov_len))
594*4882a593Smuzhiyun goto out;
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
597*4882a593Smuzhiyun }
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
600*4882a593Smuzhiyun siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
601*4882a593Smuzhiyun rv == len_terminate ? "success" : "failure",
602*4882a593Smuzhiyun __rdmap_term_layer(term), __rdmap_term_etype(term),
603*4882a593Smuzhiyun __rdmap_term_ecode(term), rv);
604*4882a593Smuzhiyun out:
605*4882a593Smuzhiyun kfree(term);
606*4882a593Smuzhiyun kfree(err_hdr);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun /*
610*4882a593Smuzhiyun * Handle all attrs other than state
611*4882a593Smuzhiyun */
siw_qp_modify_nonstate(struct siw_qp * qp,struct siw_qp_attrs * attrs,enum siw_qp_attr_mask mask)612*4882a593Smuzhiyun static void siw_qp_modify_nonstate(struct siw_qp *qp,
613*4882a593Smuzhiyun struct siw_qp_attrs *attrs,
614*4882a593Smuzhiyun enum siw_qp_attr_mask mask)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
617*4882a593Smuzhiyun if (attrs->flags & SIW_RDMA_BIND_ENABLED)
618*4882a593Smuzhiyun qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
619*4882a593Smuzhiyun else
620*4882a593Smuzhiyun qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
623*4882a593Smuzhiyun qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
624*4882a593Smuzhiyun else
625*4882a593Smuzhiyun qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun if (attrs->flags & SIW_RDMA_READ_ENABLED)
628*4882a593Smuzhiyun qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
629*4882a593Smuzhiyun else
630*4882a593Smuzhiyun qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
siw_qp_nextstate_from_idle(struct siw_qp * qp,struct siw_qp_attrs * attrs,enum siw_qp_attr_mask mask)634*4882a593Smuzhiyun static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
635*4882a593Smuzhiyun struct siw_qp_attrs *attrs,
636*4882a593Smuzhiyun enum siw_qp_attr_mask mask)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun int rv = 0;
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun switch (attrs->state) {
641*4882a593Smuzhiyun case SIW_QP_STATE_RTS:
642*4882a593Smuzhiyun if (attrs->flags & SIW_MPA_CRC) {
643*4882a593Smuzhiyun rv = siw_qp_enable_crc(qp);
644*4882a593Smuzhiyun if (rv)
645*4882a593Smuzhiyun break;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
648*4882a593Smuzhiyun siw_dbg_qp(qp, "no socket\n");
649*4882a593Smuzhiyun rv = -EINVAL;
650*4882a593Smuzhiyun break;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun if (!(mask & SIW_QP_ATTR_MPA)) {
653*4882a593Smuzhiyun siw_dbg_qp(qp, "no MPA\n");
654*4882a593Smuzhiyun rv = -EINVAL;
655*4882a593Smuzhiyun break;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun /*
658*4882a593Smuzhiyun * Initialize iWARP TX state
659*4882a593Smuzhiyun */
660*4882a593Smuzhiyun qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
661*4882a593Smuzhiyun qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
662*4882a593Smuzhiyun qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun /*
665*4882a593Smuzhiyun * Initialize iWARP RX state
666*4882a593Smuzhiyun */
667*4882a593Smuzhiyun qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
668*4882a593Smuzhiyun qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
669*4882a593Smuzhiyun qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun /*
672*4882a593Smuzhiyun * init IRD free queue, caller has already checked
673*4882a593Smuzhiyun * limits.
674*4882a593Smuzhiyun */
675*4882a593Smuzhiyun rv = siw_qp_readq_init(qp, attrs->irq_size,
676*4882a593Smuzhiyun attrs->orq_size);
677*4882a593Smuzhiyun if (rv)
678*4882a593Smuzhiyun break;
679*4882a593Smuzhiyun
680*4882a593Smuzhiyun qp->attrs.sk = attrs->sk;
681*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_RTS;
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
684*4882a593Smuzhiyun attrs->flags & SIW_MPA_CRC ? "y" : "n",
685*4882a593Smuzhiyun qp->attrs.orq_size, qp->attrs.irq_size);
686*4882a593Smuzhiyun break;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun case SIW_QP_STATE_ERROR:
689*4882a593Smuzhiyun siw_rq_flush(qp);
690*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
691*4882a593Smuzhiyun if (qp->cep) {
692*4882a593Smuzhiyun siw_cep_put(qp->cep);
693*4882a593Smuzhiyun qp->cep = NULL;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun break;
696*4882a593Smuzhiyun
697*4882a593Smuzhiyun default:
698*4882a593Smuzhiyun break;
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun return rv;
701*4882a593Smuzhiyun }
702*4882a593Smuzhiyun
siw_qp_nextstate_from_rts(struct siw_qp * qp,struct siw_qp_attrs * attrs)703*4882a593Smuzhiyun static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
704*4882a593Smuzhiyun struct siw_qp_attrs *attrs)
705*4882a593Smuzhiyun {
706*4882a593Smuzhiyun int drop_conn = 0;
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun switch (attrs->state) {
709*4882a593Smuzhiyun case SIW_QP_STATE_CLOSING:
710*4882a593Smuzhiyun /*
711*4882a593Smuzhiyun * Verbs: move to IDLE if SQ and ORQ are empty.
712*4882a593Smuzhiyun * Move to ERROR otherwise. But first of all we must
713*4882a593Smuzhiyun * close the connection. So we keep CLOSING or ERROR
714*4882a593Smuzhiyun * as a transient state, schedule connection drop work
715*4882a593Smuzhiyun * and wait for the socket state change upcall to
716*4882a593Smuzhiyun * come back closed.
717*4882a593Smuzhiyun */
718*4882a593Smuzhiyun if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
719*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_CLOSING;
720*4882a593Smuzhiyun } else {
721*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
722*4882a593Smuzhiyun siw_sq_flush(qp);
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun siw_rq_flush(qp);
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun drop_conn = 1;
727*4882a593Smuzhiyun break;
728*4882a593Smuzhiyun
729*4882a593Smuzhiyun case SIW_QP_STATE_TERMINATE:
730*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_TERMINATE;
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
733*4882a593Smuzhiyun RDMAP_ETYPE_CATASTROPHIC,
734*4882a593Smuzhiyun RDMAP_ECODE_UNSPECIFIED, 1);
735*4882a593Smuzhiyun drop_conn = 1;
736*4882a593Smuzhiyun break;
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun case SIW_QP_STATE_ERROR:
739*4882a593Smuzhiyun /*
740*4882a593Smuzhiyun * This is an emergency close.
741*4882a593Smuzhiyun *
742*4882a593Smuzhiyun * Any in progress transmit operation will get
743*4882a593Smuzhiyun * cancelled.
744*4882a593Smuzhiyun * This will likely result in a protocol failure,
745*4882a593Smuzhiyun * if a TX operation is in transit. The caller
746*4882a593Smuzhiyun * could unconditional wait to give the current
747*4882a593Smuzhiyun * operation a chance to complete.
748*4882a593Smuzhiyun * Esp., how to handle the non-empty IRQ case?
749*4882a593Smuzhiyun * The peer was asking for data transfer at a valid
750*4882a593Smuzhiyun * point in time.
751*4882a593Smuzhiyun */
752*4882a593Smuzhiyun siw_sq_flush(qp);
753*4882a593Smuzhiyun siw_rq_flush(qp);
754*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
755*4882a593Smuzhiyun drop_conn = 1;
756*4882a593Smuzhiyun break;
757*4882a593Smuzhiyun
758*4882a593Smuzhiyun default:
759*4882a593Smuzhiyun break;
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun return drop_conn;
762*4882a593Smuzhiyun }
763*4882a593Smuzhiyun
siw_qp_nextstate_from_term(struct siw_qp * qp,struct siw_qp_attrs * attrs)764*4882a593Smuzhiyun static void siw_qp_nextstate_from_term(struct siw_qp *qp,
765*4882a593Smuzhiyun struct siw_qp_attrs *attrs)
766*4882a593Smuzhiyun {
767*4882a593Smuzhiyun switch (attrs->state) {
768*4882a593Smuzhiyun case SIW_QP_STATE_ERROR:
769*4882a593Smuzhiyun siw_rq_flush(qp);
770*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
773*4882a593Smuzhiyun siw_sq_flush(qp);
774*4882a593Smuzhiyun break;
775*4882a593Smuzhiyun
776*4882a593Smuzhiyun default:
777*4882a593Smuzhiyun break;
778*4882a593Smuzhiyun }
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun
siw_qp_nextstate_from_close(struct siw_qp * qp,struct siw_qp_attrs * attrs)781*4882a593Smuzhiyun static int siw_qp_nextstate_from_close(struct siw_qp *qp,
782*4882a593Smuzhiyun struct siw_qp_attrs *attrs)
783*4882a593Smuzhiyun {
784*4882a593Smuzhiyun int rv = 0;
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun switch (attrs->state) {
787*4882a593Smuzhiyun case SIW_QP_STATE_IDLE:
788*4882a593Smuzhiyun WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
789*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_IDLE;
790*4882a593Smuzhiyun break;
791*4882a593Smuzhiyun
792*4882a593Smuzhiyun case SIW_QP_STATE_CLOSING:
793*4882a593Smuzhiyun /*
794*4882a593Smuzhiyun * The LLP may already moved the QP to closing
795*4882a593Smuzhiyun * due to graceful peer close init
796*4882a593Smuzhiyun */
797*4882a593Smuzhiyun break;
798*4882a593Smuzhiyun
799*4882a593Smuzhiyun case SIW_QP_STATE_ERROR:
800*4882a593Smuzhiyun /*
801*4882a593Smuzhiyun * QP was moved to CLOSING by LLP event
802*4882a593Smuzhiyun * not yet seen by user.
803*4882a593Smuzhiyun */
804*4882a593Smuzhiyun qp->attrs.state = SIW_QP_STATE_ERROR;
805*4882a593Smuzhiyun
806*4882a593Smuzhiyun if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
807*4882a593Smuzhiyun siw_sq_flush(qp);
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun siw_rq_flush(qp);
810*4882a593Smuzhiyun break;
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun default:
813*4882a593Smuzhiyun siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
814*4882a593Smuzhiyun siw_qp_state_to_string[qp->attrs.state],
815*4882a593Smuzhiyun siw_qp_state_to_string[attrs->state]);
816*4882a593Smuzhiyun
817*4882a593Smuzhiyun rv = -ECONNABORTED;
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun return rv;
820*4882a593Smuzhiyun }
821*4882a593Smuzhiyun
822*4882a593Smuzhiyun /*
823*4882a593Smuzhiyun * Caller must hold qp->state_lock
824*4882a593Smuzhiyun */
siw_qp_modify(struct siw_qp * qp,struct siw_qp_attrs * attrs,enum siw_qp_attr_mask mask)825*4882a593Smuzhiyun int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
826*4882a593Smuzhiyun enum siw_qp_attr_mask mask)
827*4882a593Smuzhiyun {
828*4882a593Smuzhiyun int drop_conn = 0, rv = 0;
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun if (!mask)
831*4882a593Smuzhiyun return 0;
832*4882a593Smuzhiyun
833*4882a593Smuzhiyun siw_dbg_qp(qp, "state: %s => %s\n",
834*4882a593Smuzhiyun siw_qp_state_to_string[qp->attrs.state],
835*4882a593Smuzhiyun siw_qp_state_to_string[attrs->state]);
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun if (mask != SIW_QP_ATTR_STATE)
838*4882a593Smuzhiyun siw_qp_modify_nonstate(qp, attrs, mask);
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun if (!(mask & SIW_QP_ATTR_STATE))
841*4882a593Smuzhiyun return 0;
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun switch (qp->attrs.state) {
844*4882a593Smuzhiyun case SIW_QP_STATE_IDLE:
845*4882a593Smuzhiyun case SIW_QP_STATE_RTR:
846*4882a593Smuzhiyun rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
847*4882a593Smuzhiyun break;
848*4882a593Smuzhiyun
849*4882a593Smuzhiyun case SIW_QP_STATE_RTS:
850*4882a593Smuzhiyun drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
851*4882a593Smuzhiyun break;
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun case SIW_QP_STATE_TERMINATE:
854*4882a593Smuzhiyun siw_qp_nextstate_from_term(qp, attrs);
855*4882a593Smuzhiyun break;
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun case SIW_QP_STATE_CLOSING:
858*4882a593Smuzhiyun siw_qp_nextstate_from_close(qp, attrs);
859*4882a593Smuzhiyun break;
860*4882a593Smuzhiyun default:
861*4882a593Smuzhiyun break;
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun if (drop_conn)
864*4882a593Smuzhiyun siw_qp_cm_drop(qp, 0);
865*4882a593Smuzhiyun
866*4882a593Smuzhiyun return rv;
867*4882a593Smuzhiyun }
868*4882a593Smuzhiyun
siw_read_to_orq(struct siw_sqe * rreq,struct siw_sqe * sqe)869*4882a593Smuzhiyun void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
870*4882a593Smuzhiyun {
871*4882a593Smuzhiyun rreq->id = sqe->id;
872*4882a593Smuzhiyun rreq->opcode = sqe->opcode;
873*4882a593Smuzhiyun rreq->sge[0].laddr = sqe->sge[0].laddr;
874*4882a593Smuzhiyun rreq->sge[0].length = sqe->sge[0].length;
875*4882a593Smuzhiyun rreq->sge[0].lkey = sqe->sge[0].lkey;
876*4882a593Smuzhiyun rreq->sge[1].lkey = sqe->sge[1].lkey;
877*4882a593Smuzhiyun rreq->flags = sqe->flags | SIW_WQE_VALID;
878*4882a593Smuzhiyun rreq->num_sge = 1;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun
siw_activate_tx_from_sq(struct siw_qp * qp)881*4882a593Smuzhiyun static int siw_activate_tx_from_sq(struct siw_qp *qp)
882*4882a593Smuzhiyun {
883*4882a593Smuzhiyun struct siw_sqe *sqe;
884*4882a593Smuzhiyun struct siw_wqe *wqe = tx_wqe(qp);
885*4882a593Smuzhiyun int rv = 1;
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun sqe = sq_get_next(qp);
888*4882a593Smuzhiyun if (!sqe)
889*4882a593Smuzhiyun return 0;
890*4882a593Smuzhiyun
891*4882a593Smuzhiyun memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
892*4882a593Smuzhiyun wqe->wr_status = SIW_WR_QUEUED;
893*4882a593Smuzhiyun
894*4882a593Smuzhiyun /* First copy SQE to kernel private memory */
895*4882a593Smuzhiyun memcpy(&wqe->sqe, sqe, sizeof(*sqe));
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
898*4882a593Smuzhiyun rv = -EINVAL;
899*4882a593Smuzhiyun goto out;
900*4882a593Smuzhiyun }
901*4882a593Smuzhiyun if (wqe->sqe.flags & SIW_WQE_INLINE) {
902*4882a593Smuzhiyun if (wqe->sqe.opcode != SIW_OP_SEND &&
903*4882a593Smuzhiyun wqe->sqe.opcode != SIW_OP_WRITE) {
904*4882a593Smuzhiyun rv = -EINVAL;
905*4882a593Smuzhiyun goto out;
906*4882a593Smuzhiyun }
907*4882a593Smuzhiyun if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
908*4882a593Smuzhiyun rv = -EINVAL;
909*4882a593Smuzhiyun goto out;
910*4882a593Smuzhiyun }
911*4882a593Smuzhiyun wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
912*4882a593Smuzhiyun wqe->sqe.sge[0].lkey = 0;
913*4882a593Smuzhiyun wqe->sqe.num_sge = 1;
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
916*4882a593Smuzhiyun /* A READ cannot be fenced */
917*4882a593Smuzhiyun if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
918*4882a593Smuzhiyun wqe->sqe.opcode ==
919*4882a593Smuzhiyun SIW_OP_READ_LOCAL_INV)) {
920*4882a593Smuzhiyun siw_dbg_qp(qp, "cannot fence read\n");
921*4882a593Smuzhiyun rv = -EINVAL;
922*4882a593Smuzhiyun goto out;
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun spin_lock(&qp->orq_lock);
925*4882a593Smuzhiyun
926*4882a593Smuzhiyun if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
927*4882a593Smuzhiyun qp->tx_ctx.orq_fence = 1;
928*4882a593Smuzhiyun rv = 0;
929*4882a593Smuzhiyun }
930*4882a593Smuzhiyun spin_unlock(&qp->orq_lock);
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun } else if (wqe->sqe.opcode == SIW_OP_READ ||
933*4882a593Smuzhiyun wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
934*4882a593Smuzhiyun struct siw_sqe *rreq;
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun if (unlikely(!qp->attrs.orq_size)) {
937*4882a593Smuzhiyun /* We negotiated not to send READ req's */
938*4882a593Smuzhiyun rv = -EINVAL;
939*4882a593Smuzhiyun goto out;
940*4882a593Smuzhiyun }
941*4882a593Smuzhiyun wqe->sqe.num_sge = 1;
942*4882a593Smuzhiyun
943*4882a593Smuzhiyun spin_lock(&qp->orq_lock);
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun rreq = orq_get_free(qp);
946*4882a593Smuzhiyun if (rreq) {
947*4882a593Smuzhiyun /*
948*4882a593Smuzhiyun * Make an immediate copy in ORQ to be ready
949*4882a593Smuzhiyun * to process loopback READ reply
950*4882a593Smuzhiyun */
951*4882a593Smuzhiyun siw_read_to_orq(rreq, &wqe->sqe);
952*4882a593Smuzhiyun qp->orq_put++;
953*4882a593Smuzhiyun } else {
954*4882a593Smuzhiyun qp->tx_ctx.orq_fence = 1;
955*4882a593Smuzhiyun rv = 0;
956*4882a593Smuzhiyun }
957*4882a593Smuzhiyun spin_unlock(&qp->orq_lock);
958*4882a593Smuzhiyun }
959*4882a593Smuzhiyun
960*4882a593Smuzhiyun /* Clear SQE, can be re-used by application */
961*4882a593Smuzhiyun smp_store_mb(sqe->flags, 0);
962*4882a593Smuzhiyun qp->sq_get++;
963*4882a593Smuzhiyun out:
964*4882a593Smuzhiyun if (unlikely(rv < 0)) {
965*4882a593Smuzhiyun siw_dbg_qp(qp, "error %d\n", rv);
966*4882a593Smuzhiyun wqe->wr_status = SIW_WR_IDLE;
967*4882a593Smuzhiyun }
968*4882a593Smuzhiyun return rv;
969*4882a593Smuzhiyun }
970*4882a593Smuzhiyun
971*4882a593Smuzhiyun /*
972*4882a593Smuzhiyun * Must be called with SQ locked.
973*4882a593Smuzhiyun * To avoid complete SQ starvation by constant inbound READ requests,
974*4882a593Smuzhiyun * the active IRQ will not be served after qp->irq_burst, if the
975*4882a593Smuzhiyun * SQ has pending work.
976*4882a593Smuzhiyun */
siw_activate_tx(struct siw_qp * qp)977*4882a593Smuzhiyun int siw_activate_tx(struct siw_qp *qp)
978*4882a593Smuzhiyun {
979*4882a593Smuzhiyun struct siw_sqe *irqe;
980*4882a593Smuzhiyun struct siw_wqe *wqe = tx_wqe(qp);
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun if (!qp->attrs.irq_size)
983*4882a593Smuzhiyun return siw_activate_tx_from_sq(qp);
984*4882a593Smuzhiyun
985*4882a593Smuzhiyun irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
986*4882a593Smuzhiyun
987*4882a593Smuzhiyun if (!(irqe->flags & SIW_WQE_VALID))
988*4882a593Smuzhiyun return siw_activate_tx_from_sq(qp);
989*4882a593Smuzhiyun
990*4882a593Smuzhiyun /*
991*4882a593Smuzhiyun * Avoid local WQE processing starvation in case
992*4882a593Smuzhiyun * of constant inbound READ request stream
993*4882a593Smuzhiyun */
994*4882a593Smuzhiyun if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
995*4882a593Smuzhiyun qp->irq_burst = 0;
996*4882a593Smuzhiyun return siw_activate_tx_from_sq(qp);
997*4882a593Smuzhiyun }
998*4882a593Smuzhiyun memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
999*4882a593Smuzhiyun wqe->wr_status = SIW_WR_QUEUED;
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun /* start READ RESPONSE */
1002*4882a593Smuzhiyun wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
1003*4882a593Smuzhiyun wqe->sqe.flags = 0;
1004*4882a593Smuzhiyun if (irqe->num_sge) {
1005*4882a593Smuzhiyun wqe->sqe.num_sge = 1;
1006*4882a593Smuzhiyun wqe->sqe.sge[0].length = irqe->sge[0].length;
1007*4882a593Smuzhiyun wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
1008*4882a593Smuzhiyun wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
1009*4882a593Smuzhiyun } else {
1010*4882a593Smuzhiyun wqe->sqe.num_sge = 0;
1011*4882a593Smuzhiyun }
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun /* Retain original RREQ's message sequence number for
1014*4882a593Smuzhiyun * potential error reporting cases.
1015*4882a593Smuzhiyun */
1016*4882a593Smuzhiyun wqe->sqe.sge[1].length = irqe->sge[1].length;
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun wqe->sqe.rkey = irqe->rkey;
1019*4882a593Smuzhiyun wqe->sqe.raddr = irqe->raddr;
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun wqe->processed = 0;
1022*4882a593Smuzhiyun qp->irq_get++;
1023*4882a593Smuzhiyun
1024*4882a593Smuzhiyun /* mark current IRQ entry free */
1025*4882a593Smuzhiyun smp_store_mb(irqe->flags, 0);
1026*4882a593Smuzhiyun
1027*4882a593Smuzhiyun return 1;
1028*4882a593Smuzhiyun }
1029*4882a593Smuzhiyun
1030*4882a593Smuzhiyun /*
1031*4882a593Smuzhiyun * Check if current CQ state qualifies for calling CQ completion
1032*4882a593Smuzhiyun * handler. Must be called with CQ lock held.
1033*4882a593Smuzhiyun */
siw_cq_notify_now(struct siw_cq * cq,u32 flags)1034*4882a593Smuzhiyun static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1035*4882a593Smuzhiyun {
1036*4882a593Smuzhiyun u32 cq_notify;
1037*4882a593Smuzhiyun
1038*4882a593Smuzhiyun if (!cq->base_cq.comp_handler)
1039*4882a593Smuzhiyun return false;
1040*4882a593Smuzhiyun
1041*4882a593Smuzhiyun /* Read application shared notification state */
1042*4882a593Smuzhiyun cq_notify = READ_ONCE(cq->notify->flags);
1043*4882a593Smuzhiyun
1044*4882a593Smuzhiyun if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1045*4882a593Smuzhiyun ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1046*4882a593Smuzhiyun (flags & SIW_WQE_SOLICITED))) {
1047*4882a593Smuzhiyun /*
1048*4882a593Smuzhiyun * CQ notification is one-shot: Since the
1049*4882a593Smuzhiyun * current CQE causes user notification,
1050*4882a593Smuzhiyun * the CQ gets dis-aremd and must be re-aremd
1051*4882a593Smuzhiyun * by the user for a new notification.
1052*4882a593Smuzhiyun */
1053*4882a593Smuzhiyun WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
1054*4882a593Smuzhiyun
1055*4882a593Smuzhiyun return true;
1056*4882a593Smuzhiyun }
1057*4882a593Smuzhiyun return false;
1058*4882a593Smuzhiyun }
1059*4882a593Smuzhiyun
siw_sqe_complete(struct siw_qp * qp,struct siw_sqe * sqe,u32 bytes,enum siw_wc_status status)1060*4882a593Smuzhiyun int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1061*4882a593Smuzhiyun enum siw_wc_status status)
1062*4882a593Smuzhiyun {
1063*4882a593Smuzhiyun struct siw_cq *cq = qp->scq;
1064*4882a593Smuzhiyun int rv = 0;
1065*4882a593Smuzhiyun
1066*4882a593Smuzhiyun if (cq) {
1067*4882a593Smuzhiyun u32 sqe_flags = sqe->flags;
1068*4882a593Smuzhiyun struct siw_cqe *cqe;
1069*4882a593Smuzhiyun u32 idx;
1070*4882a593Smuzhiyun unsigned long flags;
1071*4882a593Smuzhiyun
1072*4882a593Smuzhiyun spin_lock_irqsave(&cq->lock, flags);
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun idx = cq->cq_put % cq->num_cqe;
1075*4882a593Smuzhiyun cqe = &cq->queue[idx];
1076*4882a593Smuzhiyun
1077*4882a593Smuzhiyun if (!READ_ONCE(cqe->flags)) {
1078*4882a593Smuzhiyun bool notify;
1079*4882a593Smuzhiyun
1080*4882a593Smuzhiyun cqe->id = sqe->id;
1081*4882a593Smuzhiyun cqe->opcode = sqe->opcode;
1082*4882a593Smuzhiyun cqe->status = status;
1083*4882a593Smuzhiyun cqe->imm_data = 0;
1084*4882a593Smuzhiyun cqe->bytes = bytes;
1085*4882a593Smuzhiyun
1086*4882a593Smuzhiyun if (rdma_is_kernel_res(&cq->base_cq.res))
1087*4882a593Smuzhiyun cqe->base_qp = &qp->base_qp;
1088*4882a593Smuzhiyun else
1089*4882a593Smuzhiyun cqe->qp_id = qp_id(qp);
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun /* mark CQE valid for application */
1092*4882a593Smuzhiyun WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1093*4882a593Smuzhiyun /* recycle SQE */
1094*4882a593Smuzhiyun smp_store_mb(sqe->flags, 0);
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun cq->cq_put++;
1097*4882a593Smuzhiyun notify = siw_cq_notify_now(cq, sqe_flags);
1098*4882a593Smuzhiyun
1099*4882a593Smuzhiyun spin_unlock_irqrestore(&cq->lock, flags);
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun if (notify) {
1102*4882a593Smuzhiyun siw_dbg_cq(cq, "Call completion handler\n");
1103*4882a593Smuzhiyun cq->base_cq.comp_handler(&cq->base_cq,
1104*4882a593Smuzhiyun cq->base_cq.cq_context);
1105*4882a593Smuzhiyun }
1106*4882a593Smuzhiyun } else {
1107*4882a593Smuzhiyun spin_unlock_irqrestore(&cq->lock, flags);
1108*4882a593Smuzhiyun rv = -ENOMEM;
1109*4882a593Smuzhiyun siw_cq_event(cq, IB_EVENT_CQ_ERR);
1110*4882a593Smuzhiyun }
1111*4882a593Smuzhiyun } else {
1112*4882a593Smuzhiyun /* recycle SQE */
1113*4882a593Smuzhiyun smp_store_mb(sqe->flags, 0);
1114*4882a593Smuzhiyun }
1115*4882a593Smuzhiyun return rv;
1116*4882a593Smuzhiyun }
1117*4882a593Smuzhiyun
siw_rqe_complete(struct siw_qp * qp,struct siw_rqe * rqe,u32 bytes,u32 inval_stag,enum siw_wc_status status)1118*4882a593Smuzhiyun int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1119*4882a593Smuzhiyun u32 inval_stag, enum siw_wc_status status)
1120*4882a593Smuzhiyun {
1121*4882a593Smuzhiyun struct siw_cq *cq = qp->rcq;
1122*4882a593Smuzhiyun int rv = 0;
1123*4882a593Smuzhiyun
1124*4882a593Smuzhiyun if (cq) {
1125*4882a593Smuzhiyun struct siw_cqe *cqe;
1126*4882a593Smuzhiyun u32 idx;
1127*4882a593Smuzhiyun unsigned long flags;
1128*4882a593Smuzhiyun
1129*4882a593Smuzhiyun spin_lock_irqsave(&cq->lock, flags);
1130*4882a593Smuzhiyun
1131*4882a593Smuzhiyun idx = cq->cq_put % cq->num_cqe;
1132*4882a593Smuzhiyun cqe = &cq->queue[idx];
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun if (!READ_ONCE(cqe->flags)) {
1135*4882a593Smuzhiyun bool notify;
1136*4882a593Smuzhiyun u8 cqe_flags = SIW_WQE_VALID;
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun cqe->id = rqe->id;
1139*4882a593Smuzhiyun cqe->opcode = SIW_OP_RECEIVE;
1140*4882a593Smuzhiyun cqe->status = status;
1141*4882a593Smuzhiyun cqe->imm_data = 0;
1142*4882a593Smuzhiyun cqe->bytes = bytes;
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun if (rdma_is_kernel_res(&cq->base_cq.res)) {
1145*4882a593Smuzhiyun cqe->base_qp = &qp->base_qp;
1146*4882a593Smuzhiyun if (inval_stag) {
1147*4882a593Smuzhiyun cqe_flags |= SIW_WQE_REM_INVAL;
1148*4882a593Smuzhiyun cqe->inval_stag = inval_stag;
1149*4882a593Smuzhiyun }
1150*4882a593Smuzhiyun } else {
1151*4882a593Smuzhiyun cqe->qp_id = qp_id(qp);
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun /* mark CQE valid for application */
1154*4882a593Smuzhiyun WRITE_ONCE(cqe->flags, cqe_flags);
1155*4882a593Smuzhiyun /* recycle RQE */
1156*4882a593Smuzhiyun smp_store_mb(rqe->flags, 0);
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun cq->cq_put++;
1159*4882a593Smuzhiyun notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1160*4882a593Smuzhiyun
1161*4882a593Smuzhiyun spin_unlock_irqrestore(&cq->lock, flags);
1162*4882a593Smuzhiyun
1163*4882a593Smuzhiyun if (notify) {
1164*4882a593Smuzhiyun siw_dbg_cq(cq, "Call completion handler\n");
1165*4882a593Smuzhiyun cq->base_cq.comp_handler(&cq->base_cq,
1166*4882a593Smuzhiyun cq->base_cq.cq_context);
1167*4882a593Smuzhiyun }
1168*4882a593Smuzhiyun } else {
1169*4882a593Smuzhiyun spin_unlock_irqrestore(&cq->lock, flags);
1170*4882a593Smuzhiyun rv = -ENOMEM;
1171*4882a593Smuzhiyun siw_cq_event(cq, IB_EVENT_CQ_ERR);
1172*4882a593Smuzhiyun }
1173*4882a593Smuzhiyun } else {
1174*4882a593Smuzhiyun /* recycle RQE */
1175*4882a593Smuzhiyun smp_store_mb(rqe->flags, 0);
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun return rv;
1178*4882a593Smuzhiyun }
1179*4882a593Smuzhiyun
1180*4882a593Smuzhiyun /*
1181*4882a593Smuzhiyun * siw_sq_flush()
1182*4882a593Smuzhiyun *
1183*4882a593Smuzhiyun * Flush SQ and ORRQ entries to CQ.
1184*4882a593Smuzhiyun *
1185*4882a593Smuzhiyun * Must be called with QP state write lock held.
1186*4882a593Smuzhiyun * Therefore, SQ and ORQ lock must not be taken.
1187*4882a593Smuzhiyun */
siw_sq_flush(struct siw_qp * qp)1188*4882a593Smuzhiyun void siw_sq_flush(struct siw_qp *qp)
1189*4882a593Smuzhiyun {
1190*4882a593Smuzhiyun struct siw_sqe *sqe;
1191*4882a593Smuzhiyun struct siw_wqe *wqe = tx_wqe(qp);
1192*4882a593Smuzhiyun int async_event = 0;
1193*4882a593Smuzhiyun
1194*4882a593Smuzhiyun /*
1195*4882a593Smuzhiyun * Start with completing any work currently on the ORQ
1196*4882a593Smuzhiyun */
1197*4882a593Smuzhiyun while (qp->attrs.orq_size) {
1198*4882a593Smuzhiyun sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1199*4882a593Smuzhiyun if (!READ_ONCE(sqe->flags))
1200*4882a593Smuzhiyun break;
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1203*4882a593Smuzhiyun break;
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun WRITE_ONCE(sqe->flags, 0);
1206*4882a593Smuzhiyun qp->orq_get++;
1207*4882a593Smuzhiyun }
1208*4882a593Smuzhiyun /*
1209*4882a593Smuzhiyun * Flush an in-progress WQE if present
1210*4882a593Smuzhiyun */
1211*4882a593Smuzhiyun if (wqe->wr_status != SIW_WR_IDLE) {
1212*4882a593Smuzhiyun siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1213*4882a593Smuzhiyun tx_type(wqe), wqe->wr_status);
1214*4882a593Smuzhiyun
1215*4882a593Smuzhiyun siw_wqe_put_mem(wqe, tx_type(wqe));
1216*4882a593Smuzhiyun
1217*4882a593Smuzhiyun if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1218*4882a593Smuzhiyun ((tx_type(wqe) != SIW_OP_READ &&
1219*4882a593Smuzhiyun tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1220*4882a593Smuzhiyun wqe->wr_status == SIW_WR_QUEUED))
1221*4882a593Smuzhiyun /*
1222*4882a593Smuzhiyun * An in-progress Read Request is already in
1223*4882a593Smuzhiyun * the ORQ
1224*4882a593Smuzhiyun */
1225*4882a593Smuzhiyun siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1226*4882a593Smuzhiyun SIW_WC_WR_FLUSH_ERR);
1227*4882a593Smuzhiyun
1228*4882a593Smuzhiyun wqe->wr_status = SIW_WR_IDLE;
1229*4882a593Smuzhiyun }
1230*4882a593Smuzhiyun /*
1231*4882a593Smuzhiyun * Flush the Send Queue
1232*4882a593Smuzhiyun */
1233*4882a593Smuzhiyun while (qp->attrs.sq_size) {
1234*4882a593Smuzhiyun sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1235*4882a593Smuzhiyun if (!READ_ONCE(sqe->flags))
1236*4882a593Smuzhiyun break;
1237*4882a593Smuzhiyun
1238*4882a593Smuzhiyun async_event = 1;
1239*4882a593Smuzhiyun if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1240*4882a593Smuzhiyun /*
1241*4882a593Smuzhiyun * Shall IB_EVENT_SQ_DRAINED be supressed if work
1242*4882a593Smuzhiyun * completion fails?
1243*4882a593Smuzhiyun */
1244*4882a593Smuzhiyun break;
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun WRITE_ONCE(sqe->flags, 0);
1247*4882a593Smuzhiyun qp->sq_get++;
1248*4882a593Smuzhiyun }
1249*4882a593Smuzhiyun if (async_event)
1250*4882a593Smuzhiyun siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1251*4882a593Smuzhiyun }
1252*4882a593Smuzhiyun
1253*4882a593Smuzhiyun /*
1254*4882a593Smuzhiyun * siw_rq_flush()
1255*4882a593Smuzhiyun *
1256*4882a593Smuzhiyun * Flush recv queue entries to CQ. Also
1257*4882a593Smuzhiyun * takes care of pending active tagged and untagged
1258*4882a593Smuzhiyun * inbound transfers, which have target memory
1259*4882a593Smuzhiyun * referenced.
1260*4882a593Smuzhiyun *
1261*4882a593Smuzhiyun * Must be called with QP state write lock held.
1262*4882a593Smuzhiyun * Therefore, RQ lock must not be taken.
1263*4882a593Smuzhiyun */
siw_rq_flush(struct siw_qp * qp)1264*4882a593Smuzhiyun void siw_rq_flush(struct siw_qp *qp)
1265*4882a593Smuzhiyun {
1266*4882a593Smuzhiyun struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1267*4882a593Smuzhiyun
1268*4882a593Smuzhiyun /*
1269*4882a593Smuzhiyun * Flush an in-progress untagged operation if present
1270*4882a593Smuzhiyun */
1271*4882a593Smuzhiyun if (wqe->wr_status != SIW_WR_IDLE) {
1272*4882a593Smuzhiyun siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1273*4882a593Smuzhiyun rx_type(wqe), wqe->wr_status);
1274*4882a593Smuzhiyun
1275*4882a593Smuzhiyun siw_wqe_put_mem(wqe, rx_type(wqe));
1276*4882a593Smuzhiyun
1277*4882a593Smuzhiyun if (rx_type(wqe) == SIW_OP_RECEIVE) {
1278*4882a593Smuzhiyun siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1279*4882a593Smuzhiyun 0, SIW_WC_WR_FLUSH_ERR);
1280*4882a593Smuzhiyun } else if (rx_type(wqe) != SIW_OP_READ &&
1281*4882a593Smuzhiyun rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1282*4882a593Smuzhiyun rx_type(wqe) != SIW_OP_WRITE) {
1283*4882a593Smuzhiyun siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun wqe->wr_status = SIW_WR_IDLE;
1286*4882a593Smuzhiyun }
1287*4882a593Smuzhiyun wqe = &qp->rx_tagged.wqe_active;
1288*4882a593Smuzhiyun
1289*4882a593Smuzhiyun if (wqe->wr_status != SIW_WR_IDLE) {
1290*4882a593Smuzhiyun siw_wqe_put_mem(wqe, rx_type(wqe));
1291*4882a593Smuzhiyun wqe->wr_status = SIW_WR_IDLE;
1292*4882a593Smuzhiyun }
1293*4882a593Smuzhiyun /*
1294*4882a593Smuzhiyun * Flush the Receive Queue
1295*4882a593Smuzhiyun */
1296*4882a593Smuzhiyun while (qp->attrs.rq_size) {
1297*4882a593Smuzhiyun struct siw_rqe *rqe =
1298*4882a593Smuzhiyun &qp->recvq[qp->rq_get % qp->attrs.rq_size];
1299*4882a593Smuzhiyun
1300*4882a593Smuzhiyun if (!READ_ONCE(rqe->flags))
1301*4882a593Smuzhiyun break;
1302*4882a593Smuzhiyun
1303*4882a593Smuzhiyun if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1304*4882a593Smuzhiyun break;
1305*4882a593Smuzhiyun
1306*4882a593Smuzhiyun WRITE_ONCE(rqe->flags, 0);
1307*4882a593Smuzhiyun qp->rq_get++;
1308*4882a593Smuzhiyun }
1309*4882a593Smuzhiyun }
1310*4882a593Smuzhiyun
siw_qp_add(struct siw_device * sdev,struct siw_qp * qp)1311*4882a593Smuzhiyun int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1312*4882a593Smuzhiyun {
1313*4882a593Smuzhiyun int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
1314*4882a593Smuzhiyun GFP_KERNEL);
1315*4882a593Smuzhiyun
1316*4882a593Smuzhiyun if (!rv) {
1317*4882a593Smuzhiyun kref_init(&qp->ref);
1318*4882a593Smuzhiyun qp->sdev = sdev;
1319*4882a593Smuzhiyun siw_dbg_qp(qp, "new QP\n");
1320*4882a593Smuzhiyun }
1321*4882a593Smuzhiyun return rv;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun
siw_free_qp(struct kref * ref)1324*4882a593Smuzhiyun void siw_free_qp(struct kref *ref)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1327*4882a593Smuzhiyun struct siw_device *sdev = qp->sdev;
1328*4882a593Smuzhiyun unsigned long flags;
1329*4882a593Smuzhiyun
1330*4882a593Smuzhiyun if (qp->cep)
1331*4882a593Smuzhiyun siw_cep_put(qp->cep);
1332*4882a593Smuzhiyun
1333*4882a593Smuzhiyun found = xa_erase(&sdev->qp_xa, qp_id(qp));
1334*4882a593Smuzhiyun WARN_ON(found != qp);
1335*4882a593Smuzhiyun spin_lock_irqsave(&sdev->lock, flags);
1336*4882a593Smuzhiyun list_del(&qp->devq);
1337*4882a593Smuzhiyun spin_unlock_irqrestore(&sdev->lock, flags);
1338*4882a593Smuzhiyun
1339*4882a593Smuzhiyun vfree(qp->sendq);
1340*4882a593Smuzhiyun vfree(qp->recvq);
1341*4882a593Smuzhiyun vfree(qp->irq);
1342*4882a593Smuzhiyun vfree(qp->orq);
1343*4882a593Smuzhiyun
1344*4882a593Smuzhiyun siw_put_tx_cpu(qp->tx_cpu);
1345*4882a593Smuzhiyun
1346*4882a593Smuzhiyun atomic_dec(&sdev->num_qp);
1347*4882a593Smuzhiyun siw_dbg_qp(qp, "free QP\n");
1348*4882a593Smuzhiyun kfree_rcu(qp, rcu);
1349*4882a593Smuzhiyun }
1350