1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * NVMe over Fabrics TCP target.
4*4882a593Smuzhiyun * Copyright (c) 2018 Lightbits Labs. All rights reserved.
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun #include <linux/module.h>
8*4882a593Smuzhiyun #include <linux/init.h>
9*4882a593Smuzhiyun #include <linux/slab.h>
10*4882a593Smuzhiyun #include <linux/err.h>
11*4882a593Smuzhiyun #include <linux/nvme-tcp.h>
12*4882a593Smuzhiyun #include <net/sock.h>
13*4882a593Smuzhiyun #include <net/tcp.h>
14*4882a593Smuzhiyun #include <linux/inet.h>
15*4882a593Smuzhiyun #include <linux/llist.h>
16*4882a593Smuzhiyun #include <crypto/hash.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include "nvmet.h"
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /* Define the socket priority to use for connections were it is desirable
23*4882a593Smuzhiyun * that the NIC consider performing optimized packet processing or filtering.
24*4882a593Smuzhiyun * A non-zero value being sufficient to indicate general consideration of any
25*4882a593Smuzhiyun * possible optimization. Making it a module param allows for alternative
26*4882a593Smuzhiyun * values that may be unique for some NIC implementations.
27*4882a593Smuzhiyun */
28*4882a593Smuzhiyun static int so_priority;
29*4882a593Smuzhiyun module_param(so_priority, int, 0644);
30*4882a593Smuzhiyun MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #define NVMET_TCP_RECV_BUDGET 8
33*4882a593Smuzhiyun #define NVMET_TCP_SEND_BUDGET 8
34*4882a593Smuzhiyun #define NVMET_TCP_IO_WORK_BUDGET 64
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun enum nvmet_tcp_send_state {
37*4882a593Smuzhiyun NVMET_TCP_SEND_DATA_PDU,
38*4882a593Smuzhiyun NVMET_TCP_SEND_DATA,
39*4882a593Smuzhiyun NVMET_TCP_SEND_R2T,
40*4882a593Smuzhiyun NVMET_TCP_SEND_DDGST,
41*4882a593Smuzhiyun NVMET_TCP_SEND_RESPONSE
42*4882a593Smuzhiyun };
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun enum nvmet_tcp_recv_state {
45*4882a593Smuzhiyun NVMET_TCP_RECV_PDU,
46*4882a593Smuzhiyun NVMET_TCP_RECV_DATA,
47*4882a593Smuzhiyun NVMET_TCP_RECV_DDGST,
48*4882a593Smuzhiyun NVMET_TCP_RECV_ERR,
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun enum {
52*4882a593Smuzhiyun NVMET_TCP_F_INIT_FAILED = (1 << 0),
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun struct nvmet_tcp_cmd {
56*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
57*4882a593Smuzhiyun struct nvmet_req req;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun struct nvme_tcp_cmd_pdu *cmd_pdu;
60*4882a593Smuzhiyun struct nvme_tcp_rsp_pdu *rsp_pdu;
61*4882a593Smuzhiyun struct nvme_tcp_data_pdu *data_pdu;
62*4882a593Smuzhiyun struct nvme_tcp_r2t_pdu *r2t_pdu;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun u32 rbytes_done;
65*4882a593Smuzhiyun u32 wbytes_done;
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun u32 pdu_len;
68*4882a593Smuzhiyun u32 pdu_recv;
69*4882a593Smuzhiyun int sg_idx;
70*4882a593Smuzhiyun int nr_mapped;
71*4882a593Smuzhiyun struct msghdr recv_msg;
72*4882a593Smuzhiyun struct kvec *iov;
73*4882a593Smuzhiyun u32 flags;
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun struct list_head entry;
76*4882a593Smuzhiyun struct llist_node lentry;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /* send state */
79*4882a593Smuzhiyun u32 offset;
80*4882a593Smuzhiyun struct scatterlist *cur_sg;
81*4882a593Smuzhiyun enum nvmet_tcp_send_state state;
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun __le32 exp_ddgst;
84*4882a593Smuzhiyun __le32 recv_ddgst;
85*4882a593Smuzhiyun };
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun enum nvmet_tcp_queue_state {
88*4882a593Smuzhiyun NVMET_TCP_Q_CONNECTING,
89*4882a593Smuzhiyun NVMET_TCP_Q_LIVE,
90*4882a593Smuzhiyun NVMET_TCP_Q_DISCONNECTING,
91*4882a593Smuzhiyun };
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun struct nvmet_tcp_queue {
94*4882a593Smuzhiyun struct socket *sock;
95*4882a593Smuzhiyun struct nvmet_tcp_port *port;
96*4882a593Smuzhiyun struct work_struct io_work;
97*4882a593Smuzhiyun struct nvmet_cq nvme_cq;
98*4882a593Smuzhiyun struct nvmet_sq nvme_sq;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun /* send state */
101*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmds;
102*4882a593Smuzhiyun unsigned int nr_cmds;
103*4882a593Smuzhiyun struct list_head free_list;
104*4882a593Smuzhiyun struct llist_head resp_list;
105*4882a593Smuzhiyun struct list_head resp_send_list;
106*4882a593Smuzhiyun int send_list_len;
107*4882a593Smuzhiyun struct nvmet_tcp_cmd *snd_cmd;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun /* recv state */
110*4882a593Smuzhiyun int offset;
111*4882a593Smuzhiyun int left;
112*4882a593Smuzhiyun enum nvmet_tcp_recv_state rcv_state;
113*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd;
114*4882a593Smuzhiyun union nvme_tcp_pdu pdu;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun /* digest state */
117*4882a593Smuzhiyun bool hdr_digest;
118*4882a593Smuzhiyun bool data_digest;
119*4882a593Smuzhiyun struct ahash_request *snd_hash;
120*4882a593Smuzhiyun struct ahash_request *rcv_hash;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun spinlock_t state_lock;
123*4882a593Smuzhiyun enum nvmet_tcp_queue_state state;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun struct sockaddr_storage sockaddr;
126*4882a593Smuzhiyun struct sockaddr_storage sockaddr_peer;
127*4882a593Smuzhiyun struct work_struct release_work;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun int idx;
130*4882a593Smuzhiyun struct list_head queue_list;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun struct nvmet_tcp_cmd connect;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun struct page_frag_cache pf_cache;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun void (*data_ready)(struct sock *);
137*4882a593Smuzhiyun void (*state_change)(struct sock *);
138*4882a593Smuzhiyun void (*write_space)(struct sock *);
139*4882a593Smuzhiyun };
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun struct nvmet_tcp_port {
142*4882a593Smuzhiyun struct socket *sock;
143*4882a593Smuzhiyun struct work_struct accept_work;
144*4882a593Smuzhiyun struct nvmet_port *nport;
145*4882a593Smuzhiyun struct sockaddr_storage addr;
146*4882a593Smuzhiyun void (*data_ready)(struct sock *);
147*4882a593Smuzhiyun };
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun static DEFINE_IDA(nvmet_tcp_queue_ida);
150*4882a593Smuzhiyun static LIST_HEAD(nvmet_tcp_queue_list);
151*4882a593Smuzhiyun static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun static struct workqueue_struct *nvmet_tcp_wq;
154*4882a593Smuzhiyun static const struct nvmet_fabrics_ops nvmet_tcp_ops;
155*4882a593Smuzhiyun static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
156*4882a593Smuzhiyun static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
157*4882a593Smuzhiyun
nvmet_tcp_cmd_tag(struct nvmet_tcp_queue * queue,struct nvmet_tcp_cmd * cmd)158*4882a593Smuzhiyun static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
159*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun if (unlikely(!queue->nr_cmds)) {
162*4882a593Smuzhiyun /* We didn't allocate cmds yet, send 0xffff */
163*4882a593Smuzhiyun return USHRT_MAX;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun return cmd - queue->cmds;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
nvmet_tcp_has_data_in(struct nvmet_tcp_cmd * cmd)169*4882a593Smuzhiyun static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun return nvme_is_write(cmd->req.cmd) &&
172*4882a593Smuzhiyun cmd->rbytes_done < cmd->req.transfer_len;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
nvmet_tcp_need_data_in(struct nvmet_tcp_cmd * cmd)175*4882a593Smuzhiyun static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
nvmet_tcp_need_data_out(struct nvmet_tcp_cmd * cmd)180*4882a593Smuzhiyun static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun return !nvme_is_write(cmd->req.cmd) &&
183*4882a593Smuzhiyun cmd->req.transfer_len > 0 &&
184*4882a593Smuzhiyun !cmd->req.cqe->status;
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun
nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd * cmd)187*4882a593Smuzhiyun static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun return nvme_is_write(cmd->req.cmd) && cmd->pdu_len &&
190*4882a593Smuzhiyun !cmd->rbytes_done;
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun static inline struct nvmet_tcp_cmd *
nvmet_tcp_get_cmd(struct nvmet_tcp_queue * queue)194*4882a593Smuzhiyun nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun cmd = list_first_entry_or_null(&queue->free_list,
199*4882a593Smuzhiyun struct nvmet_tcp_cmd, entry);
200*4882a593Smuzhiyun if (!cmd)
201*4882a593Smuzhiyun return NULL;
202*4882a593Smuzhiyun list_del_init(&cmd->entry);
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun cmd->rbytes_done = cmd->wbytes_done = 0;
205*4882a593Smuzhiyun cmd->pdu_len = 0;
206*4882a593Smuzhiyun cmd->pdu_recv = 0;
207*4882a593Smuzhiyun cmd->iov = NULL;
208*4882a593Smuzhiyun cmd->flags = 0;
209*4882a593Smuzhiyun return cmd;
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun
nvmet_tcp_put_cmd(struct nvmet_tcp_cmd * cmd)212*4882a593Smuzhiyun static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun if (unlikely(cmd == &cmd->queue->connect))
215*4882a593Smuzhiyun return;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun list_add_tail(&cmd->entry, &cmd->queue->free_list);
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
queue_cpu(struct nvmet_tcp_queue * queue)220*4882a593Smuzhiyun static inline int queue_cpu(struct nvmet_tcp_queue *queue)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun return queue->sock->sk->sk_incoming_cpu;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun
nvmet_tcp_hdgst_len(struct nvmet_tcp_queue * queue)225*4882a593Smuzhiyun static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun
nvmet_tcp_ddgst_len(struct nvmet_tcp_queue * queue)230*4882a593Smuzhiyun static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
nvmet_tcp_hdgst(struct ahash_request * hash,void * pdu,size_t len)235*4882a593Smuzhiyun static inline void nvmet_tcp_hdgst(struct ahash_request *hash,
236*4882a593Smuzhiyun void *pdu, size_t len)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun struct scatterlist sg;
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun sg_init_one(&sg, pdu, len);
241*4882a593Smuzhiyun ahash_request_set_crypt(hash, &sg, pdu + len, len);
242*4882a593Smuzhiyun crypto_ahash_digest(hash);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue * queue,void * pdu,size_t len)245*4882a593Smuzhiyun static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue,
246*4882a593Smuzhiyun void *pdu, size_t len)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun struct nvme_tcp_hdr *hdr = pdu;
249*4882a593Smuzhiyun __le32 recv_digest;
250*4882a593Smuzhiyun __le32 exp_digest;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
253*4882a593Smuzhiyun pr_err("queue %d: header digest enabled but no header digest\n",
254*4882a593Smuzhiyun queue->idx);
255*4882a593Smuzhiyun return -EPROTO;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun recv_digest = *(__le32 *)(pdu + hdr->hlen);
259*4882a593Smuzhiyun nvmet_tcp_hdgst(queue->rcv_hash, pdu, len);
260*4882a593Smuzhiyun exp_digest = *(__le32 *)(pdu + hdr->hlen);
261*4882a593Smuzhiyun if (recv_digest != exp_digest) {
262*4882a593Smuzhiyun pr_err("queue %d: header digest error: recv %#x expected %#x\n",
263*4882a593Smuzhiyun queue->idx, le32_to_cpu(recv_digest),
264*4882a593Smuzhiyun le32_to_cpu(exp_digest));
265*4882a593Smuzhiyun return -EPROTO;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun return 0;
269*4882a593Smuzhiyun }
270*4882a593Smuzhiyun
nvmet_tcp_check_ddgst(struct nvmet_tcp_queue * queue,void * pdu)271*4882a593Smuzhiyun static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun struct nvme_tcp_hdr *hdr = pdu;
274*4882a593Smuzhiyun u8 digest_len = nvmet_tcp_hdgst_len(queue);
275*4882a593Smuzhiyun u32 len;
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun len = le32_to_cpu(hdr->plen) - hdr->hlen -
278*4882a593Smuzhiyun (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0);
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
281*4882a593Smuzhiyun pr_err("queue %d: data digest flag is cleared\n", queue->idx);
282*4882a593Smuzhiyun return -EPROTO;
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun return 0;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd * cmd)288*4882a593Smuzhiyun static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun struct scatterlist *sg;
291*4882a593Smuzhiyun int i;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun sg = &cmd->req.sg[cmd->sg_idx];
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun for (i = 0; i < cmd->nr_mapped; i++)
296*4882a593Smuzhiyun kunmap(sg_page(&sg[i]));
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd * cmd)299*4882a593Smuzhiyun static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
300*4882a593Smuzhiyun {
301*4882a593Smuzhiyun struct kvec *iov = cmd->iov;
302*4882a593Smuzhiyun struct scatterlist *sg;
303*4882a593Smuzhiyun u32 length, offset, sg_offset;
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun length = cmd->pdu_len;
306*4882a593Smuzhiyun cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
307*4882a593Smuzhiyun offset = cmd->rbytes_done;
308*4882a593Smuzhiyun cmd->sg_idx = offset / PAGE_SIZE;
309*4882a593Smuzhiyun sg_offset = offset % PAGE_SIZE;
310*4882a593Smuzhiyun sg = &cmd->req.sg[cmd->sg_idx];
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun while (length) {
313*4882a593Smuzhiyun u32 iov_len = min_t(u32, length, sg->length - sg_offset);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
316*4882a593Smuzhiyun iov->iov_len = iov_len;
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun length -= iov_len;
319*4882a593Smuzhiyun sg = sg_next(sg);
320*4882a593Smuzhiyun iov++;
321*4882a593Smuzhiyun sg_offset = 0;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
325*4882a593Smuzhiyun cmd->nr_mapped, cmd->pdu_len);
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
nvmet_tcp_fatal_error(struct nvmet_tcp_queue * queue)328*4882a593Smuzhiyun static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_ERR;
331*4882a593Smuzhiyun if (queue->nvme_sq.ctrl)
332*4882a593Smuzhiyun nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
333*4882a593Smuzhiyun else
334*4882a593Smuzhiyun kernel_sock_shutdown(queue->sock, SHUT_RDWR);
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
nvmet_tcp_socket_error(struct nvmet_tcp_queue * queue,int status)337*4882a593Smuzhiyun static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun if (status == -EPIPE || status == -ECONNRESET)
340*4882a593Smuzhiyun kernel_sock_shutdown(queue->sock, SHUT_RDWR);
341*4882a593Smuzhiyun else
342*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun
nvmet_tcp_map_data(struct nvmet_tcp_cmd * cmd)345*4882a593Smuzhiyun static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
346*4882a593Smuzhiyun {
347*4882a593Smuzhiyun struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
348*4882a593Smuzhiyun u32 len = le32_to_cpu(sgl->length);
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun if (!len)
351*4882a593Smuzhiyun return 0;
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
354*4882a593Smuzhiyun NVME_SGL_FMT_OFFSET)) {
355*4882a593Smuzhiyun if (!nvme_is_write(cmd->req.cmd))
356*4882a593Smuzhiyun return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun if (len > cmd->req.port->inline_data_size)
359*4882a593Smuzhiyun return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
360*4882a593Smuzhiyun cmd->pdu_len = len;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun cmd->req.transfer_len += len;
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt);
365*4882a593Smuzhiyun if (!cmd->req.sg)
366*4882a593Smuzhiyun return NVME_SC_INTERNAL;
367*4882a593Smuzhiyun cmd->cur_sg = cmd->req.sg;
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun if (nvmet_tcp_has_data_in(cmd)) {
370*4882a593Smuzhiyun cmd->iov = kmalloc_array(cmd->req.sg_cnt,
371*4882a593Smuzhiyun sizeof(*cmd->iov), GFP_KERNEL);
372*4882a593Smuzhiyun if (!cmd->iov)
373*4882a593Smuzhiyun goto err;
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun return 0;
377*4882a593Smuzhiyun err:
378*4882a593Smuzhiyun sgl_free(cmd->req.sg);
379*4882a593Smuzhiyun return NVME_SC_INTERNAL;
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
nvmet_tcp_send_ddgst(struct ahash_request * hash,struct nvmet_tcp_cmd * cmd)382*4882a593Smuzhiyun static void nvmet_tcp_send_ddgst(struct ahash_request *hash,
383*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd)
384*4882a593Smuzhiyun {
385*4882a593Smuzhiyun ahash_request_set_crypt(hash, cmd->req.sg,
386*4882a593Smuzhiyun (void *)&cmd->exp_ddgst, cmd->req.transfer_len);
387*4882a593Smuzhiyun crypto_ahash_digest(hash);
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
nvmet_tcp_recv_ddgst(struct ahash_request * hash,struct nvmet_tcp_cmd * cmd)390*4882a593Smuzhiyun static void nvmet_tcp_recv_ddgst(struct ahash_request *hash,
391*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd)
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun struct scatterlist sg;
394*4882a593Smuzhiyun struct kvec *iov;
395*4882a593Smuzhiyun int i;
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun crypto_ahash_init(hash);
398*4882a593Smuzhiyun for (i = 0, iov = cmd->iov; i < cmd->nr_mapped; i++, iov++) {
399*4882a593Smuzhiyun sg_init_one(&sg, iov->iov_base, iov->iov_len);
400*4882a593Smuzhiyun ahash_request_set_crypt(hash, &sg, NULL, iov->iov_len);
401*4882a593Smuzhiyun crypto_ahash_update(hash);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun ahash_request_set_crypt(hash, NULL, (void *)&cmd->exp_ddgst, 0);
404*4882a593Smuzhiyun crypto_ahash_final(hash);
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun
nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd * cmd)407*4882a593Smuzhiyun static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun struct nvme_tcp_data_pdu *pdu = cmd->data_pdu;
410*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
411*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
412*4882a593Smuzhiyun u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue);
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun cmd->offset = 0;
415*4882a593Smuzhiyun cmd->state = NVMET_TCP_SEND_DATA_PDU;
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun pdu->hdr.type = nvme_tcp_c2h_data;
418*4882a593Smuzhiyun pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ?
419*4882a593Smuzhiyun NVME_TCP_F_DATA_SUCCESS : 0);
420*4882a593Smuzhiyun pdu->hdr.hlen = sizeof(*pdu);
421*4882a593Smuzhiyun pdu->hdr.pdo = pdu->hdr.hlen + hdgst;
422*4882a593Smuzhiyun pdu->hdr.plen =
423*4882a593Smuzhiyun cpu_to_le32(pdu->hdr.hlen + hdgst +
424*4882a593Smuzhiyun cmd->req.transfer_len + ddgst);
425*4882a593Smuzhiyun pdu->command_id = cmd->req.cqe->command_id;
426*4882a593Smuzhiyun pdu->data_length = cpu_to_le32(cmd->req.transfer_len);
427*4882a593Smuzhiyun pdu->data_offset = cpu_to_le32(cmd->wbytes_done);
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun if (queue->data_digest) {
430*4882a593Smuzhiyun pdu->hdr.flags |= NVME_TCP_F_DDGST;
431*4882a593Smuzhiyun nvmet_tcp_send_ddgst(queue->snd_hash, cmd);
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun if (cmd->queue->hdr_digest) {
435*4882a593Smuzhiyun pdu->hdr.flags |= NVME_TCP_F_HDGST;
436*4882a593Smuzhiyun nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun
nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd * cmd)440*4882a593Smuzhiyun static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd)
441*4882a593Smuzhiyun {
442*4882a593Smuzhiyun struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu;
443*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
444*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun cmd->offset = 0;
447*4882a593Smuzhiyun cmd->state = NVMET_TCP_SEND_R2T;
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun pdu->hdr.type = nvme_tcp_r2t;
450*4882a593Smuzhiyun pdu->hdr.flags = 0;
451*4882a593Smuzhiyun pdu->hdr.hlen = sizeof(*pdu);
452*4882a593Smuzhiyun pdu->hdr.pdo = 0;
453*4882a593Smuzhiyun pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun pdu->command_id = cmd->req.cmd->common.command_id;
456*4882a593Smuzhiyun pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd);
457*4882a593Smuzhiyun pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done);
458*4882a593Smuzhiyun pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done);
459*4882a593Smuzhiyun if (cmd->queue->hdr_digest) {
460*4882a593Smuzhiyun pdu->hdr.flags |= NVME_TCP_F_HDGST;
461*4882a593Smuzhiyun nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun
nvmet_setup_response_pdu(struct nvmet_tcp_cmd * cmd)465*4882a593Smuzhiyun static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu;
468*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
469*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun cmd->offset = 0;
472*4882a593Smuzhiyun cmd->state = NVMET_TCP_SEND_RESPONSE;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun pdu->hdr.type = nvme_tcp_rsp;
475*4882a593Smuzhiyun pdu->hdr.flags = 0;
476*4882a593Smuzhiyun pdu->hdr.hlen = sizeof(*pdu);
477*4882a593Smuzhiyun pdu->hdr.pdo = 0;
478*4882a593Smuzhiyun pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
479*4882a593Smuzhiyun if (cmd->queue->hdr_digest) {
480*4882a593Smuzhiyun pdu->hdr.flags |= NVME_TCP_F_HDGST;
481*4882a593Smuzhiyun nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
482*4882a593Smuzhiyun }
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
nvmet_tcp_process_resp_list(struct nvmet_tcp_queue * queue)485*4882a593Smuzhiyun static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
486*4882a593Smuzhiyun {
487*4882a593Smuzhiyun struct llist_node *node;
488*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd;
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun for (node = llist_del_all(&queue->resp_list); node; node = node->next) {
491*4882a593Smuzhiyun cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry);
492*4882a593Smuzhiyun list_add(&cmd->entry, &queue->resp_send_list);
493*4882a593Smuzhiyun queue->send_list_len++;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun
nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue * queue)497*4882a593Smuzhiyun static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list,
500*4882a593Smuzhiyun struct nvmet_tcp_cmd, entry);
501*4882a593Smuzhiyun if (!queue->snd_cmd) {
502*4882a593Smuzhiyun nvmet_tcp_process_resp_list(queue);
503*4882a593Smuzhiyun queue->snd_cmd =
504*4882a593Smuzhiyun list_first_entry_or_null(&queue->resp_send_list,
505*4882a593Smuzhiyun struct nvmet_tcp_cmd, entry);
506*4882a593Smuzhiyun if (unlikely(!queue->snd_cmd))
507*4882a593Smuzhiyun return NULL;
508*4882a593Smuzhiyun }
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun list_del_init(&queue->snd_cmd->entry);
511*4882a593Smuzhiyun queue->send_list_len--;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun if (nvmet_tcp_need_data_out(queue->snd_cmd))
514*4882a593Smuzhiyun nvmet_setup_c2h_data_pdu(queue->snd_cmd);
515*4882a593Smuzhiyun else if (nvmet_tcp_need_data_in(queue->snd_cmd))
516*4882a593Smuzhiyun nvmet_setup_r2t_pdu(queue->snd_cmd);
517*4882a593Smuzhiyun else
518*4882a593Smuzhiyun nvmet_setup_response_pdu(queue->snd_cmd);
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun return queue->snd_cmd;
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun
nvmet_tcp_queue_response(struct nvmet_req * req)523*4882a593Smuzhiyun static void nvmet_tcp_queue_response(struct nvmet_req *req)
524*4882a593Smuzhiyun {
525*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd =
526*4882a593Smuzhiyun container_of(req, struct nvmet_tcp_cmd, req);
527*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
528*4882a593Smuzhiyun struct nvme_sgl_desc *sgl;
529*4882a593Smuzhiyun u32 len;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun if (unlikely(cmd == queue->cmd)) {
532*4882a593Smuzhiyun sgl = &cmd->req.cmd->common.dptr.sgl;
533*4882a593Smuzhiyun len = le32_to_cpu(sgl->length);
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun /*
536*4882a593Smuzhiyun * Wait for inline data before processing the response.
537*4882a593Smuzhiyun * Avoid using helpers, this might happen before
538*4882a593Smuzhiyun * nvmet_req_init is completed.
539*4882a593Smuzhiyun */
540*4882a593Smuzhiyun if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
541*4882a593Smuzhiyun len && len <= cmd->req.port->inline_data_size &&
542*4882a593Smuzhiyun nvme_is_write(cmd->req.cmd))
543*4882a593Smuzhiyun return;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun llist_add(&cmd->lentry, &queue->resp_list);
547*4882a593Smuzhiyun queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun
nvmet_tcp_execute_request(struct nvmet_tcp_cmd * cmd)550*4882a593Smuzhiyun static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
551*4882a593Smuzhiyun {
552*4882a593Smuzhiyun if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED))
553*4882a593Smuzhiyun nvmet_tcp_queue_response(&cmd->req);
554*4882a593Smuzhiyun else
555*4882a593Smuzhiyun cmd->req.execute(&cmd->req);
556*4882a593Smuzhiyun }
557*4882a593Smuzhiyun
nvmet_try_send_data_pdu(struct nvmet_tcp_cmd * cmd)558*4882a593Smuzhiyun static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
559*4882a593Smuzhiyun {
560*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
561*4882a593Smuzhiyun int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst;
562*4882a593Smuzhiyun int ret;
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
565*4882a593Smuzhiyun offset_in_page(cmd->data_pdu) + cmd->offset,
566*4882a593Smuzhiyun left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
567*4882a593Smuzhiyun if (ret <= 0)
568*4882a593Smuzhiyun return ret;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun cmd->offset += ret;
571*4882a593Smuzhiyun left -= ret;
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun if (left)
574*4882a593Smuzhiyun return -EAGAIN;
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun cmd->state = NVMET_TCP_SEND_DATA;
577*4882a593Smuzhiyun cmd->offset = 0;
578*4882a593Smuzhiyun return 1;
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
nvmet_try_send_data(struct nvmet_tcp_cmd * cmd,bool last_in_batch)581*4882a593Smuzhiyun static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
582*4882a593Smuzhiyun {
583*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
584*4882a593Smuzhiyun int ret;
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun while (cmd->cur_sg) {
587*4882a593Smuzhiyun struct page *page = sg_page(cmd->cur_sg);
588*4882a593Smuzhiyun u32 left = cmd->cur_sg->length - cmd->offset;
589*4882a593Smuzhiyun int flags = MSG_DONTWAIT;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun if ((!last_in_batch && cmd->queue->send_list_len) ||
592*4882a593Smuzhiyun cmd->wbytes_done + left < cmd->req.transfer_len ||
593*4882a593Smuzhiyun queue->data_digest || !queue->nvme_sq.sqhd_disabled)
594*4882a593Smuzhiyun flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
597*4882a593Smuzhiyun left, flags);
598*4882a593Smuzhiyun if (ret <= 0)
599*4882a593Smuzhiyun return ret;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun cmd->offset += ret;
602*4882a593Smuzhiyun cmd->wbytes_done += ret;
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun /* Done with sg?*/
605*4882a593Smuzhiyun if (cmd->offset == cmd->cur_sg->length) {
606*4882a593Smuzhiyun cmd->cur_sg = sg_next(cmd->cur_sg);
607*4882a593Smuzhiyun cmd->offset = 0;
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun
611*4882a593Smuzhiyun if (queue->data_digest) {
612*4882a593Smuzhiyun cmd->state = NVMET_TCP_SEND_DDGST;
613*4882a593Smuzhiyun cmd->offset = 0;
614*4882a593Smuzhiyun } else {
615*4882a593Smuzhiyun if (queue->nvme_sq.sqhd_disabled) {
616*4882a593Smuzhiyun cmd->queue->snd_cmd = NULL;
617*4882a593Smuzhiyun nvmet_tcp_put_cmd(cmd);
618*4882a593Smuzhiyun } else {
619*4882a593Smuzhiyun nvmet_setup_response_pdu(cmd);
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun if (queue->nvme_sq.sqhd_disabled) {
624*4882a593Smuzhiyun kfree(cmd->iov);
625*4882a593Smuzhiyun sgl_free(cmd->req.sg);
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun return 1;
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun }
631*4882a593Smuzhiyun
nvmet_try_send_response(struct nvmet_tcp_cmd * cmd,bool last_in_batch)632*4882a593Smuzhiyun static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
633*4882a593Smuzhiyun bool last_in_batch)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
636*4882a593Smuzhiyun int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst;
637*4882a593Smuzhiyun int flags = MSG_DONTWAIT;
638*4882a593Smuzhiyun int ret;
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun if (!last_in_batch && cmd->queue->send_list_len)
641*4882a593Smuzhiyun flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
642*4882a593Smuzhiyun else
643*4882a593Smuzhiyun flags |= MSG_EOR;
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu),
646*4882a593Smuzhiyun offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags);
647*4882a593Smuzhiyun if (ret <= 0)
648*4882a593Smuzhiyun return ret;
649*4882a593Smuzhiyun cmd->offset += ret;
650*4882a593Smuzhiyun left -= ret;
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun if (left)
653*4882a593Smuzhiyun return -EAGAIN;
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun kfree(cmd->iov);
656*4882a593Smuzhiyun sgl_free(cmd->req.sg);
657*4882a593Smuzhiyun cmd->queue->snd_cmd = NULL;
658*4882a593Smuzhiyun nvmet_tcp_put_cmd(cmd);
659*4882a593Smuzhiyun return 1;
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun
nvmet_try_send_r2t(struct nvmet_tcp_cmd * cmd,bool last_in_batch)662*4882a593Smuzhiyun static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
665*4882a593Smuzhiyun int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst;
666*4882a593Smuzhiyun int flags = MSG_DONTWAIT;
667*4882a593Smuzhiyun int ret;
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun if (!last_in_batch && cmd->queue->send_list_len)
670*4882a593Smuzhiyun flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
671*4882a593Smuzhiyun else
672*4882a593Smuzhiyun flags |= MSG_EOR;
673*4882a593Smuzhiyun
674*4882a593Smuzhiyun ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu),
675*4882a593Smuzhiyun offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags);
676*4882a593Smuzhiyun if (ret <= 0)
677*4882a593Smuzhiyun return ret;
678*4882a593Smuzhiyun cmd->offset += ret;
679*4882a593Smuzhiyun left -= ret;
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun if (left)
682*4882a593Smuzhiyun return -EAGAIN;
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun cmd->queue->snd_cmd = NULL;
685*4882a593Smuzhiyun return 1;
686*4882a593Smuzhiyun }
687*4882a593Smuzhiyun
nvmet_try_send_ddgst(struct nvmet_tcp_cmd * cmd,bool last_in_batch)688*4882a593Smuzhiyun static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
689*4882a593Smuzhiyun {
690*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
691*4882a593Smuzhiyun int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
692*4882a593Smuzhiyun struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
693*4882a593Smuzhiyun struct kvec iov = {
694*4882a593Smuzhiyun .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
695*4882a593Smuzhiyun .iov_len = left
696*4882a593Smuzhiyun };
697*4882a593Smuzhiyun int ret;
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun if (!last_in_batch && cmd->queue->send_list_len)
700*4882a593Smuzhiyun msg.msg_flags |= MSG_MORE;
701*4882a593Smuzhiyun else
702*4882a593Smuzhiyun msg.msg_flags |= MSG_EOR;
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
705*4882a593Smuzhiyun if (unlikely(ret <= 0))
706*4882a593Smuzhiyun return ret;
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun cmd->offset += ret;
709*4882a593Smuzhiyun left -= ret;
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun if (left)
712*4882a593Smuzhiyun return -EAGAIN;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun if (queue->nvme_sq.sqhd_disabled) {
715*4882a593Smuzhiyun cmd->queue->snd_cmd = NULL;
716*4882a593Smuzhiyun nvmet_tcp_put_cmd(cmd);
717*4882a593Smuzhiyun } else {
718*4882a593Smuzhiyun nvmet_setup_response_pdu(cmd);
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun return 1;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun
nvmet_tcp_try_send_one(struct nvmet_tcp_queue * queue,bool last_in_batch)723*4882a593Smuzhiyun static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
724*4882a593Smuzhiyun bool last_in_batch)
725*4882a593Smuzhiyun {
726*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
727*4882a593Smuzhiyun int ret = 0;
728*4882a593Smuzhiyun
729*4882a593Smuzhiyun if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
730*4882a593Smuzhiyun cmd = nvmet_tcp_fetch_cmd(queue);
731*4882a593Smuzhiyun if (unlikely(!cmd))
732*4882a593Smuzhiyun return 0;
733*4882a593Smuzhiyun }
734*4882a593Smuzhiyun
735*4882a593Smuzhiyun if (cmd->state == NVMET_TCP_SEND_DATA_PDU) {
736*4882a593Smuzhiyun ret = nvmet_try_send_data_pdu(cmd);
737*4882a593Smuzhiyun if (ret <= 0)
738*4882a593Smuzhiyun goto done_send;
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun if (cmd->state == NVMET_TCP_SEND_DATA) {
742*4882a593Smuzhiyun ret = nvmet_try_send_data(cmd, last_in_batch);
743*4882a593Smuzhiyun if (ret <= 0)
744*4882a593Smuzhiyun goto done_send;
745*4882a593Smuzhiyun }
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun if (cmd->state == NVMET_TCP_SEND_DDGST) {
748*4882a593Smuzhiyun ret = nvmet_try_send_ddgst(cmd, last_in_batch);
749*4882a593Smuzhiyun if (ret <= 0)
750*4882a593Smuzhiyun goto done_send;
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun if (cmd->state == NVMET_TCP_SEND_R2T) {
754*4882a593Smuzhiyun ret = nvmet_try_send_r2t(cmd, last_in_batch);
755*4882a593Smuzhiyun if (ret <= 0)
756*4882a593Smuzhiyun goto done_send;
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun
759*4882a593Smuzhiyun if (cmd->state == NVMET_TCP_SEND_RESPONSE)
760*4882a593Smuzhiyun ret = nvmet_try_send_response(cmd, last_in_batch);
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun done_send:
763*4882a593Smuzhiyun if (ret < 0) {
764*4882a593Smuzhiyun if (ret == -EAGAIN)
765*4882a593Smuzhiyun return 0;
766*4882a593Smuzhiyun return ret;
767*4882a593Smuzhiyun }
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun return 1;
770*4882a593Smuzhiyun }
771*4882a593Smuzhiyun
nvmet_tcp_try_send(struct nvmet_tcp_queue * queue,int budget,int * sends)772*4882a593Smuzhiyun static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
773*4882a593Smuzhiyun int budget, int *sends)
774*4882a593Smuzhiyun {
775*4882a593Smuzhiyun int i, ret = 0;
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun for (i = 0; i < budget; i++) {
778*4882a593Smuzhiyun ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
779*4882a593Smuzhiyun if (unlikely(ret < 0)) {
780*4882a593Smuzhiyun nvmet_tcp_socket_error(queue, ret);
781*4882a593Smuzhiyun goto done;
782*4882a593Smuzhiyun } else if (ret == 0) {
783*4882a593Smuzhiyun break;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun (*sends)++;
786*4882a593Smuzhiyun }
787*4882a593Smuzhiyun done:
788*4882a593Smuzhiyun return ret;
789*4882a593Smuzhiyun }
790*4882a593Smuzhiyun
nvmet_prepare_receive_pdu(struct nvmet_tcp_queue * queue)791*4882a593Smuzhiyun static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue)
792*4882a593Smuzhiyun {
793*4882a593Smuzhiyun queue->offset = 0;
794*4882a593Smuzhiyun queue->left = sizeof(struct nvme_tcp_hdr);
795*4882a593Smuzhiyun queue->cmd = NULL;
796*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_PDU;
797*4882a593Smuzhiyun }
798*4882a593Smuzhiyun
nvmet_tcp_free_crypto(struct nvmet_tcp_queue * queue)799*4882a593Smuzhiyun static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue)
800*4882a593Smuzhiyun {
801*4882a593Smuzhiyun struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun ahash_request_free(queue->rcv_hash);
804*4882a593Smuzhiyun ahash_request_free(queue->snd_hash);
805*4882a593Smuzhiyun crypto_free_ahash(tfm);
806*4882a593Smuzhiyun }
807*4882a593Smuzhiyun
nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue * queue)808*4882a593Smuzhiyun static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue)
809*4882a593Smuzhiyun {
810*4882a593Smuzhiyun struct crypto_ahash *tfm;
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
813*4882a593Smuzhiyun if (IS_ERR(tfm))
814*4882a593Smuzhiyun return PTR_ERR(tfm);
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
817*4882a593Smuzhiyun if (!queue->snd_hash)
818*4882a593Smuzhiyun goto free_tfm;
819*4882a593Smuzhiyun ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
822*4882a593Smuzhiyun if (!queue->rcv_hash)
823*4882a593Smuzhiyun goto free_snd_hash;
824*4882a593Smuzhiyun ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
825*4882a593Smuzhiyun
826*4882a593Smuzhiyun return 0;
827*4882a593Smuzhiyun free_snd_hash:
828*4882a593Smuzhiyun ahash_request_free(queue->snd_hash);
829*4882a593Smuzhiyun free_tfm:
830*4882a593Smuzhiyun crypto_free_ahash(tfm);
831*4882a593Smuzhiyun return -ENOMEM;
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun
834*4882a593Smuzhiyun
nvmet_tcp_handle_icreq(struct nvmet_tcp_queue * queue)835*4882a593Smuzhiyun static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq;
838*4882a593Smuzhiyun struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp;
839*4882a593Smuzhiyun struct msghdr msg = {};
840*4882a593Smuzhiyun struct kvec iov;
841*4882a593Smuzhiyun int ret;
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
844*4882a593Smuzhiyun pr_err("bad nvme-tcp pdu length (%d)\n",
845*4882a593Smuzhiyun le32_to_cpu(icreq->hdr.plen));
846*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
847*4882a593Smuzhiyun }
848*4882a593Smuzhiyun
849*4882a593Smuzhiyun if (icreq->pfv != NVME_TCP_PFV_1_0) {
850*4882a593Smuzhiyun pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv);
851*4882a593Smuzhiyun return -EPROTO;
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun if (icreq->hpda != 0) {
855*4882a593Smuzhiyun pr_err("queue %d: unsupported hpda %d\n", queue->idx,
856*4882a593Smuzhiyun icreq->hpda);
857*4882a593Smuzhiyun return -EPROTO;
858*4882a593Smuzhiyun }
859*4882a593Smuzhiyun
860*4882a593Smuzhiyun queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE);
861*4882a593Smuzhiyun queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE);
862*4882a593Smuzhiyun if (queue->hdr_digest || queue->data_digest) {
863*4882a593Smuzhiyun ret = nvmet_tcp_alloc_crypto(queue);
864*4882a593Smuzhiyun if (ret)
865*4882a593Smuzhiyun return ret;
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun
868*4882a593Smuzhiyun memset(icresp, 0, sizeof(*icresp));
869*4882a593Smuzhiyun icresp->hdr.type = nvme_tcp_icresp;
870*4882a593Smuzhiyun icresp->hdr.hlen = sizeof(*icresp);
871*4882a593Smuzhiyun icresp->hdr.pdo = 0;
872*4882a593Smuzhiyun icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
873*4882a593Smuzhiyun icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
874*4882a593Smuzhiyun icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
875*4882a593Smuzhiyun icresp->cpda = 0;
876*4882a593Smuzhiyun if (queue->hdr_digest)
877*4882a593Smuzhiyun icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
878*4882a593Smuzhiyun if (queue->data_digest)
879*4882a593Smuzhiyun icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun iov.iov_base = icresp;
882*4882a593Smuzhiyun iov.iov_len = sizeof(*icresp);
883*4882a593Smuzhiyun ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
884*4882a593Smuzhiyun if (ret < 0)
885*4882a593Smuzhiyun goto free_crypto;
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun queue->state = NVMET_TCP_Q_LIVE;
888*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
889*4882a593Smuzhiyun return 0;
890*4882a593Smuzhiyun free_crypto:
891*4882a593Smuzhiyun if (queue->hdr_digest || queue->data_digest)
892*4882a593Smuzhiyun nvmet_tcp_free_crypto(queue);
893*4882a593Smuzhiyun return ret;
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun
nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue * queue,struct nvmet_tcp_cmd * cmd,struct nvmet_req * req)896*4882a593Smuzhiyun static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
897*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
900*4882a593Smuzhiyun int ret;
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun if (!nvme_is_write(cmd->req.cmd) ||
903*4882a593Smuzhiyun data_len > cmd->req.port->inline_data_size) {
904*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
905*4882a593Smuzhiyun return;
906*4882a593Smuzhiyun }
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun ret = nvmet_tcp_map_data(cmd);
909*4882a593Smuzhiyun if (unlikely(ret)) {
910*4882a593Smuzhiyun pr_err("queue %d: failed to map data\n", queue->idx);
911*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
912*4882a593Smuzhiyun return;
913*4882a593Smuzhiyun }
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_DATA;
916*4882a593Smuzhiyun nvmet_tcp_map_pdu_iovec(cmd);
917*4882a593Smuzhiyun cmd->flags |= NVMET_TCP_F_INIT_FAILED;
918*4882a593Smuzhiyun }
919*4882a593Smuzhiyun
nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue * queue)920*4882a593Smuzhiyun static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
921*4882a593Smuzhiyun {
922*4882a593Smuzhiyun struct nvme_tcp_data_pdu *data = &queue->pdu.data;
923*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd;
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun if (likely(queue->nr_cmds)) {
926*4882a593Smuzhiyun if (unlikely(data->ttag >= queue->nr_cmds)) {
927*4882a593Smuzhiyun pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
928*4882a593Smuzhiyun queue->idx, data->ttag, queue->nr_cmds);
929*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
930*4882a593Smuzhiyun return -EPROTO;
931*4882a593Smuzhiyun }
932*4882a593Smuzhiyun cmd = &queue->cmds[data->ttag];
933*4882a593Smuzhiyun } else {
934*4882a593Smuzhiyun cmd = &queue->connect;
935*4882a593Smuzhiyun }
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) {
938*4882a593Smuzhiyun pr_err("ttag %u unexpected data offset %u (expected %u)\n",
939*4882a593Smuzhiyun data->ttag, le32_to_cpu(data->data_offset),
940*4882a593Smuzhiyun cmd->rbytes_done);
941*4882a593Smuzhiyun /* FIXME: use path and transport errors */
942*4882a593Smuzhiyun nvmet_req_complete(&cmd->req,
943*4882a593Smuzhiyun NVME_SC_INVALID_FIELD | NVME_SC_DNR);
944*4882a593Smuzhiyun return -EPROTO;
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun cmd->pdu_len = le32_to_cpu(data->data_length);
948*4882a593Smuzhiyun cmd->pdu_recv = 0;
949*4882a593Smuzhiyun nvmet_tcp_map_pdu_iovec(cmd);
950*4882a593Smuzhiyun queue->cmd = cmd;
951*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_DATA;
952*4882a593Smuzhiyun
953*4882a593Smuzhiyun return 0;
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun
nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue * queue)956*4882a593Smuzhiyun static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
957*4882a593Smuzhiyun {
958*4882a593Smuzhiyun struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
959*4882a593Smuzhiyun struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd;
960*4882a593Smuzhiyun struct nvmet_req *req;
961*4882a593Smuzhiyun int ret;
962*4882a593Smuzhiyun
963*4882a593Smuzhiyun if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
964*4882a593Smuzhiyun if (hdr->type != nvme_tcp_icreq) {
965*4882a593Smuzhiyun pr_err("unexpected pdu type (%d) before icreq\n",
966*4882a593Smuzhiyun hdr->type);
967*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
968*4882a593Smuzhiyun return -EPROTO;
969*4882a593Smuzhiyun }
970*4882a593Smuzhiyun return nvmet_tcp_handle_icreq(queue);
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun if (hdr->type == nvme_tcp_h2c_data) {
974*4882a593Smuzhiyun ret = nvmet_tcp_handle_h2c_data_pdu(queue);
975*4882a593Smuzhiyun if (unlikely(ret))
976*4882a593Smuzhiyun return ret;
977*4882a593Smuzhiyun return 0;
978*4882a593Smuzhiyun }
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun queue->cmd = nvmet_tcp_get_cmd(queue);
981*4882a593Smuzhiyun if (unlikely(!queue->cmd)) {
982*4882a593Smuzhiyun /* This should never happen */
983*4882a593Smuzhiyun pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
984*4882a593Smuzhiyun queue->idx, queue->nr_cmds, queue->send_list_len,
985*4882a593Smuzhiyun nvme_cmd->common.opcode);
986*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
987*4882a593Smuzhiyun return -ENOMEM;
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun
990*4882a593Smuzhiyun req = &queue->cmd->req;
991*4882a593Smuzhiyun memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd));
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun if (unlikely(!nvmet_req_init(req, &queue->nvme_cq,
994*4882a593Smuzhiyun &queue->nvme_sq, &nvmet_tcp_ops))) {
995*4882a593Smuzhiyun pr_err("failed cmd %p id %d opcode %d, data_len: %d\n",
996*4882a593Smuzhiyun req->cmd, req->cmd->common.command_id,
997*4882a593Smuzhiyun req->cmd->common.opcode,
998*4882a593Smuzhiyun le32_to_cpu(req->cmd->common.dptr.sgl.length));
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
1001*4882a593Smuzhiyun return 0;
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun
1004*4882a593Smuzhiyun ret = nvmet_tcp_map_data(queue->cmd);
1005*4882a593Smuzhiyun if (unlikely(ret)) {
1006*4882a593Smuzhiyun pr_err("queue %d: failed to map data\n", queue->idx);
1007*4882a593Smuzhiyun if (nvmet_tcp_has_inline_data(queue->cmd))
1008*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
1009*4882a593Smuzhiyun else
1010*4882a593Smuzhiyun nvmet_req_complete(req, ret);
1011*4882a593Smuzhiyun ret = -EAGAIN;
1012*4882a593Smuzhiyun goto out;
1013*4882a593Smuzhiyun }
1014*4882a593Smuzhiyun
1015*4882a593Smuzhiyun if (nvmet_tcp_need_data_in(queue->cmd)) {
1016*4882a593Smuzhiyun if (nvmet_tcp_has_inline_data(queue->cmd)) {
1017*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_DATA;
1018*4882a593Smuzhiyun nvmet_tcp_map_pdu_iovec(queue->cmd);
1019*4882a593Smuzhiyun return 0;
1020*4882a593Smuzhiyun }
1021*4882a593Smuzhiyun /* send back R2T */
1022*4882a593Smuzhiyun nvmet_tcp_queue_response(&queue->cmd->req);
1023*4882a593Smuzhiyun goto out;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun queue->cmd->req.execute(&queue->cmd->req);
1027*4882a593Smuzhiyun out:
1028*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
1029*4882a593Smuzhiyun return ret;
1030*4882a593Smuzhiyun }
1031*4882a593Smuzhiyun
1032*4882a593Smuzhiyun static const u8 nvme_tcp_pdu_sizes[] = {
1033*4882a593Smuzhiyun [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu),
1034*4882a593Smuzhiyun [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu),
1035*4882a593Smuzhiyun [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu),
1036*4882a593Smuzhiyun };
1037*4882a593Smuzhiyun
nvmet_tcp_pdu_size(u8 type)1038*4882a593Smuzhiyun static inline u8 nvmet_tcp_pdu_size(u8 type)
1039*4882a593Smuzhiyun {
1040*4882a593Smuzhiyun size_t idx = type;
1041*4882a593Smuzhiyun
1042*4882a593Smuzhiyun return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) &&
1043*4882a593Smuzhiyun nvme_tcp_pdu_sizes[idx]) ?
1044*4882a593Smuzhiyun nvme_tcp_pdu_sizes[idx] : 0;
1045*4882a593Smuzhiyun }
1046*4882a593Smuzhiyun
nvmet_tcp_pdu_valid(u8 type)1047*4882a593Smuzhiyun static inline bool nvmet_tcp_pdu_valid(u8 type)
1048*4882a593Smuzhiyun {
1049*4882a593Smuzhiyun switch (type) {
1050*4882a593Smuzhiyun case nvme_tcp_icreq:
1051*4882a593Smuzhiyun case nvme_tcp_cmd:
1052*4882a593Smuzhiyun case nvme_tcp_h2c_data:
1053*4882a593Smuzhiyun /* fallthru */
1054*4882a593Smuzhiyun return true;
1055*4882a593Smuzhiyun }
1056*4882a593Smuzhiyun
1057*4882a593Smuzhiyun return false;
1058*4882a593Smuzhiyun }
1059*4882a593Smuzhiyun
nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue * queue)1060*4882a593Smuzhiyun static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
1061*4882a593Smuzhiyun {
1062*4882a593Smuzhiyun struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
1063*4882a593Smuzhiyun int len;
1064*4882a593Smuzhiyun struct kvec iov;
1065*4882a593Smuzhiyun struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun recv:
1068*4882a593Smuzhiyun iov.iov_base = (void *)&queue->pdu + queue->offset;
1069*4882a593Smuzhiyun iov.iov_len = queue->left;
1070*4882a593Smuzhiyun len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
1071*4882a593Smuzhiyun iov.iov_len, msg.msg_flags);
1072*4882a593Smuzhiyun if (unlikely(len < 0))
1073*4882a593Smuzhiyun return len;
1074*4882a593Smuzhiyun
1075*4882a593Smuzhiyun queue->offset += len;
1076*4882a593Smuzhiyun queue->left -= len;
1077*4882a593Smuzhiyun if (queue->left)
1078*4882a593Smuzhiyun return -EAGAIN;
1079*4882a593Smuzhiyun
1080*4882a593Smuzhiyun if (queue->offset == sizeof(struct nvme_tcp_hdr)) {
1081*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(queue);
1082*4882a593Smuzhiyun
1083*4882a593Smuzhiyun if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
1084*4882a593Smuzhiyun pr_err("unexpected pdu type %d\n", hdr->type);
1085*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
1086*4882a593Smuzhiyun return -EIO;
1087*4882a593Smuzhiyun }
1088*4882a593Smuzhiyun
1089*4882a593Smuzhiyun if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) {
1090*4882a593Smuzhiyun pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen);
1091*4882a593Smuzhiyun return -EIO;
1092*4882a593Smuzhiyun }
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun queue->left = hdr->hlen - queue->offset + hdgst;
1095*4882a593Smuzhiyun goto recv;
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun if (queue->hdr_digest &&
1099*4882a593Smuzhiyun nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
1100*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue); /* fatal */
1101*4882a593Smuzhiyun return -EPROTO;
1102*4882a593Smuzhiyun }
1103*4882a593Smuzhiyun
1104*4882a593Smuzhiyun if (queue->data_digest &&
1105*4882a593Smuzhiyun nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
1106*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue); /* fatal */
1107*4882a593Smuzhiyun return -EPROTO;
1108*4882a593Smuzhiyun }
1109*4882a593Smuzhiyun
1110*4882a593Smuzhiyun return nvmet_tcp_done_recv_pdu(queue);
1111*4882a593Smuzhiyun }
1112*4882a593Smuzhiyun
nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd * cmd)1113*4882a593Smuzhiyun static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd)
1114*4882a593Smuzhiyun {
1115*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
1116*4882a593Smuzhiyun
1117*4882a593Smuzhiyun nvmet_tcp_recv_ddgst(queue->rcv_hash, cmd);
1118*4882a593Smuzhiyun queue->offset = 0;
1119*4882a593Smuzhiyun queue->left = NVME_TCP_DIGEST_LENGTH;
1120*4882a593Smuzhiyun queue->rcv_state = NVMET_TCP_RECV_DDGST;
1121*4882a593Smuzhiyun }
1122*4882a593Smuzhiyun
nvmet_tcp_try_recv_data(struct nvmet_tcp_queue * queue)1123*4882a593Smuzhiyun static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
1124*4882a593Smuzhiyun {
1125*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd = queue->cmd;
1126*4882a593Smuzhiyun int ret;
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun while (msg_data_left(&cmd->recv_msg)) {
1129*4882a593Smuzhiyun ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg,
1130*4882a593Smuzhiyun cmd->recv_msg.msg_flags);
1131*4882a593Smuzhiyun if (ret <= 0)
1132*4882a593Smuzhiyun return ret;
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun cmd->pdu_recv += ret;
1135*4882a593Smuzhiyun cmd->rbytes_done += ret;
1136*4882a593Smuzhiyun }
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun nvmet_tcp_unmap_pdu_iovec(cmd);
1139*4882a593Smuzhiyun if (queue->data_digest) {
1140*4882a593Smuzhiyun nvmet_tcp_prep_recv_ddgst(cmd);
1141*4882a593Smuzhiyun return 0;
1142*4882a593Smuzhiyun }
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun if (cmd->rbytes_done == cmd->req.transfer_len)
1145*4882a593Smuzhiyun nvmet_tcp_execute_request(cmd);
1146*4882a593Smuzhiyun
1147*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
1148*4882a593Smuzhiyun return 0;
1149*4882a593Smuzhiyun }
1150*4882a593Smuzhiyun
nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue * queue)1151*4882a593Smuzhiyun static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
1152*4882a593Smuzhiyun {
1153*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd = queue->cmd;
1154*4882a593Smuzhiyun int ret;
1155*4882a593Smuzhiyun struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
1156*4882a593Smuzhiyun struct kvec iov = {
1157*4882a593Smuzhiyun .iov_base = (void *)&cmd->recv_ddgst + queue->offset,
1158*4882a593Smuzhiyun .iov_len = queue->left
1159*4882a593Smuzhiyun };
1160*4882a593Smuzhiyun
1161*4882a593Smuzhiyun ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
1162*4882a593Smuzhiyun iov.iov_len, msg.msg_flags);
1163*4882a593Smuzhiyun if (unlikely(ret < 0))
1164*4882a593Smuzhiyun return ret;
1165*4882a593Smuzhiyun
1166*4882a593Smuzhiyun queue->offset += ret;
1167*4882a593Smuzhiyun queue->left -= ret;
1168*4882a593Smuzhiyun if (queue->left)
1169*4882a593Smuzhiyun return -EAGAIN;
1170*4882a593Smuzhiyun
1171*4882a593Smuzhiyun if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) {
1172*4882a593Smuzhiyun pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n",
1173*4882a593Smuzhiyun queue->idx, cmd->req.cmd->common.command_id,
1174*4882a593Smuzhiyun queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst),
1175*4882a593Smuzhiyun le32_to_cpu(cmd->exp_ddgst));
1176*4882a593Smuzhiyun nvmet_tcp_finish_cmd(cmd);
1177*4882a593Smuzhiyun nvmet_tcp_fatal_error(queue);
1178*4882a593Smuzhiyun ret = -EPROTO;
1179*4882a593Smuzhiyun goto out;
1180*4882a593Smuzhiyun }
1181*4882a593Smuzhiyun
1182*4882a593Smuzhiyun if (cmd->rbytes_done == cmd->req.transfer_len)
1183*4882a593Smuzhiyun nvmet_tcp_execute_request(cmd);
1184*4882a593Smuzhiyun
1185*4882a593Smuzhiyun ret = 0;
1186*4882a593Smuzhiyun out:
1187*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
1188*4882a593Smuzhiyun return ret;
1189*4882a593Smuzhiyun }
1190*4882a593Smuzhiyun
nvmet_tcp_try_recv_one(struct nvmet_tcp_queue * queue)1191*4882a593Smuzhiyun static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue)
1192*4882a593Smuzhiyun {
1193*4882a593Smuzhiyun int result = 0;
1194*4882a593Smuzhiyun
1195*4882a593Smuzhiyun if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR))
1196*4882a593Smuzhiyun return 0;
1197*4882a593Smuzhiyun
1198*4882a593Smuzhiyun if (queue->rcv_state == NVMET_TCP_RECV_PDU) {
1199*4882a593Smuzhiyun result = nvmet_tcp_try_recv_pdu(queue);
1200*4882a593Smuzhiyun if (result != 0)
1201*4882a593Smuzhiyun goto done_recv;
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun if (queue->rcv_state == NVMET_TCP_RECV_DATA) {
1205*4882a593Smuzhiyun result = nvmet_tcp_try_recv_data(queue);
1206*4882a593Smuzhiyun if (result != 0)
1207*4882a593Smuzhiyun goto done_recv;
1208*4882a593Smuzhiyun }
1209*4882a593Smuzhiyun
1210*4882a593Smuzhiyun if (queue->rcv_state == NVMET_TCP_RECV_DDGST) {
1211*4882a593Smuzhiyun result = nvmet_tcp_try_recv_ddgst(queue);
1212*4882a593Smuzhiyun if (result != 0)
1213*4882a593Smuzhiyun goto done_recv;
1214*4882a593Smuzhiyun }
1215*4882a593Smuzhiyun
1216*4882a593Smuzhiyun done_recv:
1217*4882a593Smuzhiyun if (result < 0) {
1218*4882a593Smuzhiyun if (result == -EAGAIN)
1219*4882a593Smuzhiyun return 0;
1220*4882a593Smuzhiyun return result;
1221*4882a593Smuzhiyun }
1222*4882a593Smuzhiyun return 1;
1223*4882a593Smuzhiyun }
1224*4882a593Smuzhiyun
nvmet_tcp_try_recv(struct nvmet_tcp_queue * queue,int budget,int * recvs)1225*4882a593Smuzhiyun static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
1226*4882a593Smuzhiyun int budget, int *recvs)
1227*4882a593Smuzhiyun {
1228*4882a593Smuzhiyun int i, ret = 0;
1229*4882a593Smuzhiyun
1230*4882a593Smuzhiyun for (i = 0; i < budget; i++) {
1231*4882a593Smuzhiyun ret = nvmet_tcp_try_recv_one(queue);
1232*4882a593Smuzhiyun if (unlikely(ret < 0)) {
1233*4882a593Smuzhiyun nvmet_tcp_socket_error(queue, ret);
1234*4882a593Smuzhiyun goto done;
1235*4882a593Smuzhiyun } else if (ret == 0) {
1236*4882a593Smuzhiyun break;
1237*4882a593Smuzhiyun }
1238*4882a593Smuzhiyun (*recvs)++;
1239*4882a593Smuzhiyun }
1240*4882a593Smuzhiyun done:
1241*4882a593Smuzhiyun return ret;
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun
nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue * queue)1244*4882a593Smuzhiyun static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
1245*4882a593Smuzhiyun {
1246*4882a593Smuzhiyun spin_lock(&queue->state_lock);
1247*4882a593Smuzhiyun if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
1248*4882a593Smuzhiyun queue->state = NVMET_TCP_Q_DISCONNECTING;
1249*4882a593Smuzhiyun schedule_work(&queue->release_work);
1250*4882a593Smuzhiyun }
1251*4882a593Smuzhiyun spin_unlock(&queue->state_lock);
1252*4882a593Smuzhiyun }
1253*4882a593Smuzhiyun
nvmet_tcp_io_work(struct work_struct * w)1254*4882a593Smuzhiyun static void nvmet_tcp_io_work(struct work_struct *w)
1255*4882a593Smuzhiyun {
1256*4882a593Smuzhiyun struct nvmet_tcp_queue *queue =
1257*4882a593Smuzhiyun container_of(w, struct nvmet_tcp_queue, io_work);
1258*4882a593Smuzhiyun bool pending;
1259*4882a593Smuzhiyun int ret, ops = 0;
1260*4882a593Smuzhiyun
1261*4882a593Smuzhiyun do {
1262*4882a593Smuzhiyun pending = false;
1263*4882a593Smuzhiyun
1264*4882a593Smuzhiyun ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
1265*4882a593Smuzhiyun if (ret > 0)
1266*4882a593Smuzhiyun pending = true;
1267*4882a593Smuzhiyun else if (ret < 0)
1268*4882a593Smuzhiyun return;
1269*4882a593Smuzhiyun
1270*4882a593Smuzhiyun ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
1271*4882a593Smuzhiyun if (ret > 0)
1272*4882a593Smuzhiyun pending = true;
1273*4882a593Smuzhiyun else if (ret < 0)
1274*4882a593Smuzhiyun return;
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
1277*4882a593Smuzhiyun
1278*4882a593Smuzhiyun /*
1279*4882a593Smuzhiyun * We exahusted our budget, requeue our selves
1280*4882a593Smuzhiyun */
1281*4882a593Smuzhiyun if (pending)
1282*4882a593Smuzhiyun queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun
nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue * queue,struct nvmet_tcp_cmd * c)1285*4882a593Smuzhiyun static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
1286*4882a593Smuzhiyun struct nvmet_tcp_cmd *c)
1287*4882a593Smuzhiyun {
1288*4882a593Smuzhiyun u8 hdgst = nvmet_tcp_hdgst_len(queue);
1289*4882a593Smuzhiyun
1290*4882a593Smuzhiyun c->queue = queue;
1291*4882a593Smuzhiyun c->req.port = queue->port->nport;
1292*4882a593Smuzhiyun
1293*4882a593Smuzhiyun c->cmd_pdu = page_frag_alloc(&queue->pf_cache,
1294*4882a593Smuzhiyun sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
1295*4882a593Smuzhiyun if (!c->cmd_pdu)
1296*4882a593Smuzhiyun return -ENOMEM;
1297*4882a593Smuzhiyun c->req.cmd = &c->cmd_pdu->cmd;
1298*4882a593Smuzhiyun
1299*4882a593Smuzhiyun c->rsp_pdu = page_frag_alloc(&queue->pf_cache,
1300*4882a593Smuzhiyun sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
1301*4882a593Smuzhiyun if (!c->rsp_pdu)
1302*4882a593Smuzhiyun goto out_free_cmd;
1303*4882a593Smuzhiyun c->req.cqe = &c->rsp_pdu->cqe;
1304*4882a593Smuzhiyun
1305*4882a593Smuzhiyun c->data_pdu = page_frag_alloc(&queue->pf_cache,
1306*4882a593Smuzhiyun sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
1307*4882a593Smuzhiyun if (!c->data_pdu)
1308*4882a593Smuzhiyun goto out_free_rsp;
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun c->r2t_pdu = page_frag_alloc(&queue->pf_cache,
1311*4882a593Smuzhiyun sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
1312*4882a593Smuzhiyun if (!c->r2t_pdu)
1313*4882a593Smuzhiyun goto out_free_data;
1314*4882a593Smuzhiyun
1315*4882a593Smuzhiyun c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1316*4882a593Smuzhiyun
1317*4882a593Smuzhiyun list_add_tail(&c->entry, &queue->free_list);
1318*4882a593Smuzhiyun
1319*4882a593Smuzhiyun return 0;
1320*4882a593Smuzhiyun out_free_data:
1321*4882a593Smuzhiyun page_frag_free(c->data_pdu);
1322*4882a593Smuzhiyun out_free_rsp:
1323*4882a593Smuzhiyun page_frag_free(c->rsp_pdu);
1324*4882a593Smuzhiyun out_free_cmd:
1325*4882a593Smuzhiyun page_frag_free(c->cmd_pdu);
1326*4882a593Smuzhiyun return -ENOMEM;
1327*4882a593Smuzhiyun }
1328*4882a593Smuzhiyun
nvmet_tcp_free_cmd(struct nvmet_tcp_cmd * c)1329*4882a593Smuzhiyun static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c)
1330*4882a593Smuzhiyun {
1331*4882a593Smuzhiyun page_frag_free(c->r2t_pdu);
1332*4882a593Smuzhiyun page_frag_free(c->data_pdu);
1333*4882a593Smuzhiyun page_frag_free(c->rsp_pdu);
1334*4882a593Smuzhiyun page_frag_free(c->cmd_pdu);
1335*4882a593Smuzhiyun }
1336*4882a593Smuzhiyun
nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue * queue)1337*4882a593Smuzhiyun static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue)
1338*4882a593Smuzhiyun {
1339*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmds;
1340*4882a593Smuzhiyun int i, ret = -EINVAL, nr_cmds = queue->nr_cmds;
1341*4882a593Smuzhiyun
1342*4882a593Smuzhiyun cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL);
1343*4882a593Smuzhiyun if (!cmds)
1344*4882a593Smuzhiyun goto out;
1345*4882a593Smuzhiyun
1346*4882a593Smuzhiyun for (i = 0; i < nr_cmds; i++) {
1347*4882a593Smuzhiyun ret = nvmet_tcp_alloc_cmd(queue, cmds + i);
1348*4882a593Smuzhiyun if (ret)
1349*4882a593Smuzhiyun goto out_free;
1350*4882a593Smuzhiyun }
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun queue->cmds = cmds;
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun return 0;
1355*4882a593Smuzhiyun out_free:
1356*4882a593Smuzhiyun while (--i >= 0)
1357*4882a593Smuzhiyun nvmet_tcp_free_cmd(cmds + i);
1358*4882a593Smuzhiyun kfree(cmds);
1359*4882a593Smuzhiyun out:
1360*4882a593Smuzhiyun return ret;
1361*4882a593Smuzhiyun }
1362*4882a593Smuzhiyun
nvmet_tcp_free_cmds(struct nvmet_tcp_queue * queue)1363*4882a593Smuzhiyun static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue)
1364*4882a593Smuzhiyun {
1365*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmds = queue->cmds;
1366*4882a593Smuzhiyun int i;
1367*4882a593Smuzhiyun
1368*4882a593Smuzhiyun for (i = 0; i < queue->nr_cmds; i++)
1369*4882a593Smuzhiyun nvmet_tcp_free_cmd(cmds + i);
1370*4882a593Smuzhiyun
1371*4882a593Smuzhiyun nvmet_tcp_free_cmd(&queue->connect);
1372*4882a593Smuzhiyun kfree(cmds);
1373*4882a593Smuzhiyun }
1374*4882a593Smuzhiyun
nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue * queue)1375*4882a593Smuzhiyun static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
1376*4882a593Smuzhiyun {
1377*4882a593Smuzhiyun struct socket *sock = queue->sock;
1378*4882a593Smuzhiyun
1379*4882a593Smuzhiyun write_lock_bh(&sock->sk->sk_callback_lock);
1380*4882a593Smuzhiyun sock->sk->sk_data_ready = queue->data_ready;
1381*4882a593Smuzhiyun sock->sk->sk_state_change = queue->state_change;
1382*4882a593Smuzhiyun sock->sk->sk_write_space = queue->write_space;
1383*4882a593Smuzhiyun sock->sk->sk_user_data = NULL;
1384*4882a593Smuzhiyun write_unlock_bh(&sock->sk->sk_callback_lock);
1385*4882a593Smuzhiyun }
1386*4882a593Smuzhiyun
nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd * cmd)1387*4882a593Smuzhiyun static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
1388*4882a593Smuzhiyun {
1389*4882a593Smuzhiyun nvmet_req_uninit(&cmd->req);
1390*4882a593Smuzhiyun nvmet_tcp_unmap_pdu_iovec(cmd);
1391*4882a593Smuzhiyun kfree(cmd->iov);
1392*4882a593Smuzhiyun sgl_free(cmd->req.sg);
1393*4882a593Smuzhiyun }
1394*4882a593Smuzhiyun
nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue * queue)1395*4882a593Smuzhiyun static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
1396*4882a593Smuzhiyun {
1397*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd = queue->cmds;
1398*4882a593Smuzhiyun int i;
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun for (i = 0; i < queue->nr_cmds; i++, cmd++) {
1401*4882a593Smuzhiyun if (nvmet_tcp_need_data_in(cmd))
1402*4882a593Smuzhiyun nvmet_tcp_finish_cmd(cmd);
1403*4882a593Smuzhiyun }
1404*4882a593Smuzhiyun
1405*4882a593Smuzhiyun if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
1406*4882a593Smuzhiyun /* failed in connect */
1407*4882a593Smuzhiyun nvmet_tcp_finish_cmd(&queue->connect);
1408*4882a593Smuzhiyun }
1409*4882a593Smuzhiyun }
1410*4882a593Smuzhiyun
nvmet_tcp_release_queue_work(struct work_struct * w)1411*4882a593Smuzhiyun static void nvmet_tcp_release_queue_work(struct work_struct *w)
1412*4882a593Smuzhiyun {
1413*4882a593Smuzhiyun struct page *page;
1414*4882a593Smuzhiyun struct nvmet_tcp_queue *queue =
1415*4882a593Smuzhiyun container_of(w, struct nvmet_tcp_queue, release_work);
1416*4882a593Smuzhiyun
1417*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1418*4882a593Smuzhiyun list_del_init(&queue->queue_list);
1419*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1420*4882a593Smuzhiyun
1421*4882a593Smuzhiyun nvmet_tcp_restore_socket_callbacks(queue);
1422*4882a593Smuzhiyun flush_work(&queue->io_work);
1423*4882a593Smuzhiyun
1424*4882a593Smuzhiyun nvmet_tcp_uninit_data_in_cmds(queue);
1425*4882a593Smuzhiyun nvmet_sq_destroy(&queue->nvme_sq);
1426*4882a593Smuzhiyun cancel_work_sync(&queue->io_work);
1427*4882a593Smuzhiyun sock_release(queue->sock);
1428*4882a593Smuzhiyun nvmet_tcp_free_cmds(queue);
1429*4882a593Smuzhiyun if (queue->hdr_digest || queue->data_digest)
1430*4882a593Smuzhiyun nvmet_tcp_free_crypto(queue);
1431*4882a593Smuzhiyun ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
1432*4882a593Smuzhiyun
1433*4882a593Smuzhiyun page = virt_to_head_page(queue->pf_cache.va);
1434*4882a593Smuzhiyun __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
1435*4882a593Smuzhiyun kfree(queue);
1436*4882a593Smuzhiyun }
1437*4882a593Smuzhiyun
nvmet_tcp_data_ready(struct sock * sk)1438*4882a593Smuzhiyun static void nvmet_tcp_data_ready(struct sock *sk)
1439*4882a593Smuzhiyun {
1440*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1441*4882a593Smuzhiyun
1442*4882a593Smuzhiyun read_lock_bh(&sk->sk_callback_lock);
1443*4882a593Smuzhiyun queue = sk->sk_user_data;
1444*4882a593Smuzhiyun if (likely(queue))
1445*4882a593Smuzhiyun queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
1446*4882a593Smuzhiyun read_unlock_bh(&sk->sk_callback_lock);
1447*4882a593Smuzhiyun }
1448*4882a593Smuzhiyun
nvmet_tcp_write_space(struct sock * sk)1449*4882a593Smuzhiyun static void nvmet_tcp_write_space(struct sock *sk)
1450*4882a593Smuzhiyun {
1451*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1452*4882a593Smuzhiyun
1453*4882a593Smuzhiyun read_lock_bh(&sk->sk_callback_lock);
1454*4882a593Smuzhiyun queue = sk->sk_user_data;
1455*4882a593Smuzhiyun if (unlikely(!queue))
1456*4882a593Smuzhiyun goto out;
1457*4882a593Smuzhiyun
1458*4882a593Smuzhiyun if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) {
1459*4882a593Smuzhiyun queue->write_space(sk);
1460*4882a593Smuzhiyun goto out;
1461*4882a593Smuzhiyun }
1462*4882a593Smuzhiyun
1463*4882a593Smuzhiyun if (sk_stream_is_writeable(sk)) {
1464*4882a593Smuzhiyun clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1465*4882a593Smuzhiyun queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
1466*4882a593Smuzhiyun }
1467*4882a593Smuzhiyun out:
1468*4882a593Smuzhiyun read_unlock_bh(&sk->sk_callback_lock);
1469*4882a593Smuzhiyun }
1470*4882a593Smuzhiyun
nvmet_tcp_state_change(struct sock * sk)1471*4882a593Smuzhiyun static void nvmet_tcp_state_change(struct sock *sk)
1472*4882a593Smuzhiyun {
1473*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1474*4882a593Smuzhiyun
1475*4882a593Smuzhiyun read_lock_bh(&sk->sk_callback_lock);
1476*4882a593Smuzhiyun queue = sk->sk_user_data;
1477*4882a593Smuzhiyun if (!queue)
1478*4882a593Smuzhiyun goto done;
1479*4882a593Smuzhiyun
1480*4882a593Smuzhiyun switch (sk->sk_state) {
1481*4882a593Smuzhiyun case TCP_FIN_WAIT2:
1482*4882a593Smuzhiyun case TCP_LAST_ACK:
1483*4882a593Smuzhiyun break;
1484*4882a593Smuzhiyun case TCP_FIN_WAIT1:
1485*4882a593Smuzhiyun case TCP_CLOSE_WAIT:
1486*4882a593Smuzhiyun case TCP_CLOSE:
1487*4882a593Smuzhiyun /* FALLTHRU */
1488*4882a593Smuzhiyun nvmet_tcp_schedule_release_queue(queue);
1489*4882a593Smuzhiyun break;
1490*4882a593Smuzhiyun default:
1491*4882a593Smuzhiyun pr_warn("queue %d unhandled state %d\n",
1492*4882a593Smuzhiyun queue->idx, sk->sk_state);
1493*4882a593Smuzhiyun }
1494*4882a593Smuzhiyun done:
1495*4882a593Smuzhiyun read_unlock_bh(&sk->sk_callback_lock);
1496*4882a593Smuzhiyun }
1497*4882a593Smuzhiyun
nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue * queue)1498*4882a593Smuzhiyun static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
1499*4882a593Smuzhiyun {
1500*4882a593Smuzhiyun struct socket *sock = queue->sock;
1501*4882a593Smuzhiyun struct inet_sock *inet = inet_sk(sock->sk);
1502*4882a593Smuzhiyun int ret;
1503*4882a593Smuzhiyun
1504*4882a593Smuzhiyun ret = kernel_getsockname(sock,
1505*4882a593Smuzhiyun (struct sockaddr *)&queue->sockaddr);
1506*4882a593Smuzhiyun if (ret < 0)
1507*4882a593Smuzhiyun return ret;
1508*4882a593Smuzhiyun
1509*4882a593Smuzhiyun ret = kernel_getpeername(sock,
1510*4882a593Smuzhiyun (struct sockaddr *)&queue->sockaddr_peer);
1511*4882a593Smuzhiyun if (ret < 0)
1512*4882a593Smuzhiyun return ret;
1513*4882a593Smuzhiyun
1514*4882a593Smuzhiyun /*
1515*4882a593Smuzhiyun * Cleanup whatever is sitting in the TCP transmit queue on socket
1516*4882a593Smuzhiyun * close. This is done to prevent stale data from being sent should
1517*4882a593Smuzhiyun * the network connection be restored before TCP times out.
1518*4882a593Smuzhiyun */
1519*4882a593Smuzhiyun sock_no_linger(sock->sk);
1520*4882a593Smuzhiyun
1521*4882a593Smuzhiyun if (so_priority > 0)
1522*4882a593Smuzhiyun sock_set_priority(sock->sk, so_priority);
1523*4882a593Smuzhiyun
1524*4882a593Smuzhiyun /* Set socket type of service */
1525*4882a593Smuzhiyun if (inet->rcv_tos > 0)
1526*4882a593Smuzhiyun ip_sock_set_tos(sock->sk, inet->rcv_tos);
1527*4882a593Smuzhiyun
1528*4882a593Smuzhiyun ret = 0;
1529*4882a593Smuzhiyun write_lock_bh(&sock->sk->sk_callback_lock);
1530*4882a593Smuzhiyun if (sock->sk->sk_state != TCP_ESTABLISHED) {
1531*4882a593Smuzhiyun /*
1532*4882a593Smuzhiyun * If the socket is already closing, don't even start
1533*4882a593Smuzhiyun * consuming it
1534*4882a593Smuzhiyun */
1535*4882a593Smuzhiyun ret = -ENOTCONN;
1536*4882a593Smuzhiyun } else {
1537*4882a593Smuzhiyun sock->sk->sk_user_data = queue;
1538*4882a593Smuzhiyun queue->data_ready = sock->sk->sk_data_ready;
1539*4882a593Smuzhiyun sock->sk->sk_data_ready = nvmet_tcp_data_ready;
1540*4882a593Smuzhiyun queue->state_change = sock->sk->sk_state_change;
1541*4882a593Smuzhiyun sock->sk->sk_state_change = nvmet_tcp_state_change;
1542*4882a593Smuzhiyun queue->write_space = sock->sk->sk_write_space;
1543*4882a593Smuzhiyun sock->sk->sk_write_space = nvmet_tcp_write_space;
1544*4882a593Smuzhiyun queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
1545*4882a593Smuzhiyun }
1546*4882a593Smuzhiyun write_unlock_bh(&sock->sk->sk_callback_lock);
1547*4882a593Smuzhiyun
1548*4882a593Smuzhiyun return ret;
1549*4882a593Smuzhiyun }
1550*4882a593Smuzhiyun
nvmet_tcp_alloc_queue(struct nvmet_tcp_port * port,struct socket * newsock)1551*4882a593Smuzhiyun static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
1552*4882a593Smuzhiyun struct socket *newsock)
1553*4882a593Smuzhiyun {
1554*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1555*4882a593Smuzhiyun int ret;
1556*4882a593Smuzhiyun
1557*4882a593Smuzhiyun queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1558*4882a593Smuzhiyun if (!queue)
1559*4882a593Smuzhiyun return -ENOMEM;
1560*4882a593Smuzhiyun
1561*4882a593Smuzhiyun INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
1562*4882a593Smuzhiyun INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
1563*4882a593Smuzhiyun queue->sock = newsock;
1564*4882a593Smuzhiyun queue->port = port;
1565*4882a593Smuzhiyun queue->nr_cmds = 0;
1566*4882a593Smuzhiyun spin_lock_init(&queue->state_lock);
1567*4882a593Smuzhiyun queue->state = NVMET_TCP_Q_CONNECTING;
1568*4882a593Smuzhiyun INIT_LIST_HEAD(&queue->free_list);
1569*4882a593Smuzhiyun init_llist_head(&queue->resp_list);
1570*4882a593Smuzhiyun INIT_LIST_HEAD(&queue->resp_send_list);
1571*4882a593Smuzhiyun
1572*4882a593Smuzhiyun queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL);
1573*4882a593Smuzhiyun if (queue->idx < 0) {
1574*4882a593Smuzhiyun ret = queue->idx;
1575*4882a593Smuzhiyun goto out_free_queue;
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun ret = nvmet_tcp_alloc_cmd(queue, &queue->connect);
1579*4882a593Smuzhiyun if (ret)
1580*4882a593Smuzhiyun goto out_ida_remove;
1581*4882a593Smuzhiyun
1582*4882a593Smuzhiyun ret = nvmet_sq_init(&queue->nvme_sq);
1583*4882a593Smuzhiyun if (ret)
1584*4882a593Smuzhiyun goto out_free_connect;
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun nvmet_prepare_receive_pdu(queue);
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1589*4882a593Smuzhiyun list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
1590*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1591*4882a593Smuzhiyun
1592*4882a593Smuzhiyun ret = nvmet_tcp_set_queue_sock(queue);
1593*4882a593Smuzhiyun if (ret)
1594*4882a593Smuzhiyun goto out_destroy_sq;
1595*4882a593Smuzhiyun
1596*4882a593Smuzhiyun return 0;
1597*4882a593Smuzhiyun out_destroy_sq:
1598*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1599*4882a593Smuzhiyun list_del_init(&queue->queue_list);
1600*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1601*4882a593Smuzhiyun nvmet_sq_destroy(&queue->nvme_sq);
1602*4882a593Smuzhiyun out_free_connect:
1603*4882a593Smuzhiyun nvmet_tcp_free_cmd(&queue->connect);
1604*4882a593Smuzhiyun out_ida_remove:
1605*4882a593Smuzhiyun ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
1606*4882a593Smuzhiyun out_free_queue:
1607*4882a593Smuzhiyun kfree(queue);
1608*4882a593Smuzhiyun return ret;
1609*4882a593Smuzhiyun }
1610*4882a593Smuzhiyun
nvmet_tcp_accept_work(struct work_struct * w)1611*4882a593Smuzhiyun static void nvmet_tcp_accept_work(struct work_struct *w)
1612*4882a593Smuzhiyun {
1613*4882a593Smuzhiyun struct nvmet_tcp_port *port =
1614*4882a593Smuzhiyun container_of(w, struct nvmet_tcp_port, accept_work);
1615*4882a593Smuzhiyun struct socket *newsock;
1616*4882a593Smuzhiyun int ret;
1617*4882a593Smuzhiyun
1618*4882a593Smuzhiyun while (true) {
1619*4882a593Smuzhiyun ret = kernel_accept(port->sock, &newsock, O_NONBLOCK);
1620*4882a593Smuzhiyun if (ret < 0) {
1621*4882a593Smuzhiyun if (ret != -EAGAIN)
1622*4882a593Smuzhiyun pr_warn("failed to accept err=%d\n", ret);
1623*4882a593Smuzhiyun return;
1624*4882a593Smuzhiyun }
1625*4882a593Smuzhiyun ret = nvmet_tcp_alloc_queue(port, newsock);
1626*4882a593Smuzhiyun if (ret) {
1627*4882a593Smuzhiyun pr_err("failed to allocate queue\n");
1628*4882a593Smuzhiyun sock_release(newsock);
1629*4882a593Smuzhiyun }
1630*4882a593Smuzhiyun }
1631*4882a593Smuzhiyun }
1632*4882a593Smuzhiyun
nvmet_tcp_listen_data_ready(struct sock * sk)1633*4882a593Smuzhiyun static void nvmet_tcp_listen_data_ready(struct sock *sk)
1634*4882a593Smuzhiyun {
1635*4882a593Smuzhiyun struct nvmet_tcp_port *port;
1636*4882a593Smuzhiyun
1637*4882a593Smuzhiyun read_lock_bh(&sk->sk_callback_lock);
1638*4882a593Smuzhiyun port = sk->sk_user_data;
1639*4882a593Smuzhiyun if (!port)
1640*4882a593Smuzhiyun goto out;
1641*4882a593Smuzhiyun
1642*4882a593Smuzhiyun if (sk->sk_state == TCP_LISTEN)
1643*4882a593Smuzhiyun schedule_work(&port->accept_work);
1644*4882a593Smuzhiyun out:
1645*4882a593Smuzhiyun read_unlock_bh(&sk->sk_callback_lock);
1646*4882a593Smuzhiyun }
1647*4882a593Smuzhiyun
nvmet_tcp_add_port(struct nvmet_port * nport)1648*4882a593Smuzhiyun static int nvmet_tcp_add_port(struct nvmet_port *nport)
1649*4882a593Smuzhiyun {
1650*4882a593Smuzhiyun struct nvmet_tcp_port *port;
1651*4882a593Smuzhiyun __kernel_sa_family_t af;
1652*4882a593Smuzhiyun int ret;
1653*4882a593Smuzhiyun
1654*4882a593Smuzhiyun port = kzalloc(sizeof(*port), GFP_KERNEL);
1655*4882a593Smuzhiyun if (!port)
1656*4882a593Smuzhiyun return -ENOMEM;
1657*4882a593Smuzhiyun
1658*4882a593Smuzhiyun switch (nport->disc_addr.adrfam) {
1659*4882a593Smuzhiyun case NVMF_ADDR_FAMILY_IP4:
1660*4882a593Smuzhiyun af = AF_INET;
1661*4882a593Smuzhiyun break;
1662*4882a593Smuzhiyun case NVMF_ADDR_FAMILY_IP6:
1663*4882a593Smuzhiyun af = AF_INET6;
1664*4882a593Smuzhiyun break;
1665*4882a593Smuzhiyun default:
1666*4882a593Smuzhiyun pr_err("address family %d not supported\n",
1667*4882a593Smuzhiyun nport->disc_addr.adrfam);
1668*4882a593Smuzhiyun ret = -EINVAL;
1669*4882a593Smuzhiyun goto err_port;
1670*4882a593Smuzhiyun }
1671*4882a593Smuzhiyun
1672*4882a593Smuzhiyun ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
1673*4882a593Smuzhiyun nport->disc_addr.trsvcid, &port->addr);
1674*4882a593Smuzhiyun if (ret) {
1675*4882a593Smuzhiyun pr_err("malformed ip/port passed: %s:%s\n",
1676*4882a593Smuzhiyun nport->disc_addr.traddr, nport->disc_addr.trsvcid);
1677*4882a593Smuzhiyun goto err_port;
1678*4882a593Smuzhiyun }
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun port->nport = nport;
1681*4882a593Smuzhiyun INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
1682*4882a593Smuzhiyun if (port->nport->inline_data_size < 0)
1683*4882a593Smuzhiyun port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;
1684*4882a593Smuzhiyun
1685*4882a593Smuzhiyun ret = sock_create(port->addr.ss_family, SOCK_STREAM,
1686*4882a593Smuzhiyun IPPROTO_TCP, &port->sock);
1687*4882a593Smuzhiyun if (ret) {
1688*4882a593Smuzhiyun pr_err("failed to create a socket\n");
1689*4882a593Smuzhiyun goto err_port;
1690*4882a593Smuzhiyun }
1691*4882a593Smuzhiyun
1692*4882a593Smuzhiyun port->sock->sk->sk_user_data = port;
1693*4882a593Smuzhiyun port->data_ready = port->sock->sk->sk_data_ready;
1694*4882a593Smuzhiyun port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
1695*4882a593Smuzhiyun sock_set_reuseaddr(port->sock->sk);
1696*4882a593Smuzhiyun tcp_sock_set_nodelay(port->sock->sk);
1697*4882a593Smuzhiyun if (so_priority > 0)
1698*4882a593Smuzhiyun sock_set_priority(port->sock->sk, so_priority);
1699*4882a593Smuzhiyun
1700*4882a593Smuzhiyun ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
1701*4882a593Smuzhiyun sizeof(port->addr));
1702*4882a593Smuzhiyun if (ret) {
1703*4882a593Smuzhiyun pr_err("failed to bind port socket %d\n", ret);
1704*4882a593Smuzhiyun goto err_sock;
1705*4882a593Smuzhiyun }
1706*4882a593Smuzhiyun
1707*4882a593Smuzhiyun ret = kernel_listen(port->sock, 128);
1708*4882a593Smuzhiyun if (ret) {
1709*4882a593Smuzhiyun pr_err("failed to listen %d on port sock\n", ret);
1710*4882a593Smuzhiyun goto err_sock;
1711*4882a593Smuzhiyun }
1712*4882a593Smuzhiyun
1713*4882a593Smuzhiyun nport->priv = port;
1714*4882a593Smuzhiyun pr_info("enabling port %d (%pISpc)\n",
1715*4882a593Smuzhiyun le16_to_cpu(nport->disc_addr.portid), &port->addr);
1716*4882a593Smuzhiyun
1717*4882a593Smuzhiyun return 0;
1718*4882a593Smuzhiyun
1719*4882a593Smuzhiyun err_sock:
1720*4882a593Smuzhiyun sock_release(port->sock);
1721*4882a593Smuzhiyun err_port:
1722*4882a593Smuzhiyun kfree(port);
1723*4882a593Smuzhiyun return ret;
1724*4882a593Smuzhiyun }
1725*4882a593Smuzhiyun
nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port * port)1726*4882a593Smuzhiyun static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port)
1727*4882a593Smuzhiyun {
1728*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1729*4882a593Smuzhiyun
1730*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1731*4882a593Smuzhiyun list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
1732*4882a593Smuzhiyun if (queue->port == port)
1733*4882a593Smuzhiyun kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1734*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1735*4882a593Smuzhiyun }
1736*4882a593Smuzhiyun
nvmet_tcp_remove_port(struct nvmet_port * nport)1737*4882a593Smuzhiyun static void nvmet_tcp_remove_port(struct nvmet_port *nport)
1738*4882a593Smuzhiyun {
1739*4882a593Smuzhiyun struct nvmet_tcp_port *port = nport->priv;
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun write_lock_bh(&port->sock->sk->sk_callback_lock);
1742*4882a593Smuzhiyun port->sock->sk->sk_data_ready = port->data_ready;
1743*4882a593Smuzhiyun port->sock->sk->sk_user_data = NULL;
1744*4882a593Smuzhiyun write_unlock_bh(&port->sock->sk->sk_callback_lock);
1745*4882a593Smuzhiyun cancel_work_sync(&port->accept_work);
1746*4882a593Smuzhiyun /*
1747*4882a593Smuzhiyun * Destroy the remaining queues, which are not belong to any
1748*4882a593Smuzhiyun * controller yet.
1749*4882a593Smuzhiyun */
1750*4882a593Smuzhiyun nvmet_tcp_destroy_port_queues(port);
1751*4882a593Smuzhiyun
1752*4882a593Smuzhiyun sock_release(port->sock);
1753*4882a593Smuzhiyun kfree(port);
1754*4882a593Smuzhiyun }
1755*4882a593Smuzhiyun
nvmet_tcp_delete_ctrl(struct nvmet_ctrl * ctrl)1756*4882a593Smuzhiyun static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl)
1757*4882a593Smuzhiyun {
1758*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1759*4882a593Smuzhiyun
1760*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1761*4882a593Smuzhiyun list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
1762*4882a593Smuzhiyun if (queue->nvme_sq.ctrl == ctrl)
1763*4882a593Smuzhiyun kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1764*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1765*4882a593Smuzhiyun }
1766*4882a593Smuzhiyun
nvmet_tcp_install_queue(struct nvmet_sq * sq)1767*4882a593Smuzhiyun static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
1768*4882a593Smuzhiyun {
1769*4882a593Smuzhiyun struct nvmet_tcp_queue *queue =
1770*4882a593Smuzhiyun container_of(sq, struct nvmet_tcp_queue, nvme_sq);
1771*4882a593Smuzhiyun
1772*4882a593Smuzhiyun if (sq->qid == 0) {
1773*4882a593Smuzhiyun /* Let inflight controller teardown complete */
1774*4882a593Smuzhiyun flush_scheduled_work();
1775*4882a593Smuzhiyun }
1776*4882a593Smuzhiyun
1777*4882a593Smuzhiyun queue->nr_cmds = sq->size * 2;
1778*4882a593Smuzhiyun if (nvmet_tcp_alloc_cmds(queue))
1779*4882a593Smuzhiyun return NVME_SC_INTERNAL;
1780*4882a593Smuzhiyun return 0;
1781*4882a593Smuzhiyun }
1782*4882a593Smuzhiyun
nvmet_tcp_disc_port_addr(struct nvmet_req * req,struct nvmet_port * nport,char * traddr)1783*4882a593Smuzhiyun static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
1784*4882a593Smuzhiyun struct nvmet_port *nport, char *traddr)
1785*4882a593Smuzhiyun {
1786*4882a593Smuzhiyun struct nvmet_tcp_port *port = nport->priv;
1787*4882a593Smuzhiyun
1788*4882a593Smuzhiyun if (inet_addr_is_any((struct sockaddr *)&port->addr)) {
1789*4882a593Smuzhiyun struct nvmet_tcp_cmd *cmd =
1790*4882a593Smuzhiyun container_of(req, struct nvmet_tcp_cmd, req);
1791*4882a593Smuzhiyun struct nvmet_tcp_queue *queue = cmd->queue;
1792*4882a593Smuzhiyun
1793*4882a593Smuzhiyun sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr);
1794*4882a593Smuzhiyun } else {
1795*4882a593Smuzhiyun memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
1796*4882a593Smuzhiyun }
1797*4882a593Smuzhiyun }
1798*4882a593Smuzhiyun
1799*4882a593Smuzhiyun static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
1800*4882a593Smuzhiyun .owner = THIS_MODULE,
1801*4882a593Smuzhiyun .type = NVMF_TRTYPE_TCP,
1802*4882a593Smuzhiyun .msdbd = 1,
1803*4882a593Smuzhiyun .add_port = nvmet_tcp_add_port,
1804*4882a593Smuzhiyun .remove_port = nvmet_tcp_remove_port,
1805*4882a593Smuzhiyun .queue_response = nvmet_tcp_queue_response,
1806*4882a593Smuzhiyun .delete_ctrl = nvmet_tcp_delete_ctrl,
1807*4882a593Smuzhiyun .install_queue = nvmet_tcp_install_queue,
1808*4882a593Smuzhiyun .disc_traddr = nvmet_tcp_disc_port_addr,
1809*4882a593Smuzhiyun };
1810*4882a593Smuzhiyun
nvmet_tcp_init(void)1811*4882a593Smuzhiyun static int __init nvmet_tcp_init(void)
1812*4882a593Smuzhiyun {
1813*4882a593Smuzhiyun int ret;
1814*4882a593Smuzhiyun
1815*4882a593Smuzhiyun nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
1816*4882a593Smuzhiyun WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1817*4882a593Smuzhiyun if (!nvmet_tcp_wq)
1818*4882a593Smuzhiyun return -ENOMEM;
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun ret = nvmet_register_transport(&nvmet_tcp_ops);
1821*4882a593Smuzhiyun if (ret)
1822*4882a593Smuzhiyun goto err;
1823*4882a593Smuzhiyun
1824*4882a593Smuzhiyun return 0;
1825*4882a593Smuzhiyun err:
1826*4882a593Smuzhiyun destroy_workqueue(nvmet_tcp_wq);
1827*4882a593Smuzhiyun return ret;
1828*4882a593Smuzhiyun }
1829*4882a593Smuzhiyun
nvmet_tcp_exit(void)1830*4882a593Smuzhiyun static void __exit nvmet_tcp_exit(void)
1831*4882a593Smuzhiyun {
1832*4882a593Smuzhiyun struct nvmet_tcp_queue *queue;
1833*4882a593Smuzhiyun
1834*4882a593Smuzhiyun nvmet_unregister_transport(&nvmet_tcp_ops);
1835*4882a593Smuzhiyun
1836*4882a593Smuzhiyun flush_scheduled_work();
1837*4882a593Smuzhiyun mutex_lock(&nvmet_tcp_queue_mutex);
1838*4882a593Smuzhiyun list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
1839*4882a593Smuzhiyun kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1840*4882a593Smuzhiyun mutex_unlock(&nvmet_tcp_queue_mutex);
1841*4882a593Smuzhiyun flush_scheduled_work();
1842*4882a593Smuzhiyun
1843*4882a593Smuzhiyun destroy_workqueue(nvmet_tcp_wq);
1844*4882a593Smuzhiyun }
1845*4882a593Smuzhiyun
1846*4882a593Smuzhiyun module_init(nvmet_tcp_init);
1847*4882a593Smuzhiyun module_exit(nvmet_tcp_exit);
1848*4882a593Smuzhiyun
1849*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
1850*4882a593Smuzhiyun MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */
1851