xref: /OK3568_Linux_fs/kernel/fs/cifs/smbdirect.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *   Copyright (C) 2017, Microsoft Corporation.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *   Author(s): Long Li <longli@microsoft.com>
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun #include <linux/module.h>
8*4882a593Smuzhiyun #include <linux/highmem.h>
9*4882a593Smuzhiyun #include "smbdirect.h"
10*4882a593Smuzhiyun #include "cifs_debug.h"
11*4882a593Smuzhiyun #include "cifsproto.h"
12*4882a593Smuzhiyun #include "smb2proto.h"
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun static struct smbd_response *get_empty_queue_buffer(
15*4882a593Smuzhiyun 		struct smbd_connection *info);
16*4882a593Smuzhiyun static struct smbd_response *get_receive_buffer(
17*4882a593Smuzhiyun 		struct smbd_connection *info);
18*4882a593Smuzhiyun static void put_receive_buffer(
19*4882a593Smuzhiyun 		struct smbd_connection *info,
20*4882a593Smuzhiyun 		struct smbd_response *response);
21*4882a593Smuzhiyun static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
22*4882a593Smuzhiyun static void destroy_receive_buffers(struct smbd_connection *info);
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun static void put_empty_packet(
25*4882a593Smuzhiyun 		struct smbd_connection *info, struct smbd_response *response);
26*4882a593Smuzhiyun static void enqueue_reassembly(
27*4882a593Smuzhiyun 		struct smbd_connection *info,
28*4882a593Smuzhiyun 		struct smbd_response *response, int data_length);
29*4882a593Smuzhiyun static struct smbd_response *_get_first_reassembly(
30*4882a593Smuzhiyun 		struct smbd_connection *info);
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun static int smbd_post_recv(
33*4882a593Smuzhiyun 		struct smbd_connection *info,
34*4882a593Smuzhiyun 		struct smbd_response *response);
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun static int smbd_post_send_empty(struct smbd_connection *info);
37*4882a593Smuzhiyun static int smbd_post_send_data(
38*4882a593Smuzhiyun 		struct smbd_connection *info,
39*4882a593Smuzhiyun 		struct kvec *iov, int n_vec, int remaining_data_length);
40*4882a593Smuzhiyun static int smbd_post_send_page(struct smbd_connection *info,
41*4882a593Smuzhiyun 		struct page *page, unsigned long offset,
42*4882a593Smuzhiyun 		size_t size, int remaining_data_length);
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun static void destroy_mr_list(struct smbd_connection *info);
45*4882a593Smuzhiyun static int allocate_mr_list(struct smbd_connection *info);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun /* SMBD version number */
48*4882a593Smuzhiyun #define SMBD_V1	0x0100
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun /* Port numbers for SMBD transport */
51*4882a593Smuzhiyun #define SMB_PORT	445
52*4882a593Smuzhiyun #define SMBD_PORT	5445
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun /* Address lookup and resolve timeout in ms */
55*4882a593Smuzhiyun #define RDMA_RESOLVE_TIMEOUT	5000
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun /* SMBD negotiation timeout in seconds */
58*4882a593Smuzhiyun #define SMBD_NEGOTIATE_TIMEOUT	120
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
61*4882a593Smuzhiyun #define SMBD_MIN_RECEIVE_SIZE		128
62*4882a593Smuzhiyun #define SMBD_MIN_FRAGMENTED_SIZE	131072
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun  * Default maximum number of RDMA read/write outstanding on this connection
66*4882a593Smuzhiyun  * This value is possibly decreased during QP creation on hardware limit
67*4882a593Smuzhiyun  */
68*4882a593Smuzhiyun #define SMBD_CM_RESPONDER_RESOURCES	32
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun /* Maximum number of retries on data transfer operations */
71*4882a593Smuzhiyun #define SMBD_CM_RETRY			6
72*4882a593Smuzhiyun /* No need to retry on Receiver Not Ready since SMBD manages credits */
73*4882a593Smuzhiyun #define SMBD_CM_RNR_RETRY		0
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun /*
76*4882a593Smuzhiyun  * User configurable initial values per SMBD transport connection
77*4882a593Smuzhiyun  * as defined in [MS-SMBD] 3.1.1.1
78*4882a593Smuzhiyun  * Those may change after a SMBD negotiation
79*4882a593Smuzhiyun  */
80*4882a593Smuzhiyun /* The local peer's maximum number of credits to grant to the peer */
81*4882a593Smuzhiyun int smbd_receive_credit_max = 255;
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun /* The remote peer's credit request of local peer */
84*4882a593Smuzhiyun int smbd_send_credit_target = 255;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun /* The maximum single message size can be sent to remote peer */
87*4882a593Smuzhiyun int smbd_max_send_size = 1364;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun /*  The maximum fragmented upper-layer payload receive size supported */
90*4882a593Smuzhiyun int smbd_max_fragmented_recv_size = 1024 * 1024;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun /*  The maximum single-message size which can be received */
93*4882a593Smuzhiyun int smbd_max_receive_size = 8192;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun /* The timeout to initiate send of a keepalive message on idle */
96*4882a593Smuzhiyun int smbd_keep_alive_interval = 120;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun /*
99*4882a593Smuzhiyun  * User configurable initial values for RDMA transport
100*4882a593Smuzhiyun  * The actual values used may be lower and are limited to hardware capabilities
101*4882a593Smuzhiyun  */
102*4882a593Smuzhiyun /* Default maximum number of SGEs in a RDMA write/read */
103*4882a593Smuzhiyun int smbd_max_frmr_depth = 2048;
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun /* If payload is less than this byte, use RDMA send/recv not read/write */
106*4882a593Smuzhiyun int rdma_readwrite_threshold = 4096;
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun /* Transport logging functions
109*4882a593Smuzhiyun  * Logging are defined as classes. They can be OR'ed to define the actual
110*4882a593Smuzhiyun  * logging level via module parameter smbd_logging_class
111*4882a593Smuzhiyun  * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and
112*4882a593Smuzhiyun  * log_rdma_event()
113*4882a593Smuzhiyun  */
114*4882a593Smuzhiyun #define LOG_OUTGOING			0x1
115*4882a593Smuzhiyun #define LOG_INCOMING			0x2
116*4882a593Smuzhiyun #define LOG_READ			0x4
117*4882a593Smuzhiyun #define LOG_WRITE			0x8
118*4882a593Smuzhiyun #define LOG_RDMA_SEND			0x10
119*4882a593Smuzhiyun #define LOG_RDMA_RECV			0x20
120*4882a593Smuzhiyun #define LOG_KEEP_ALIVE			0x40
121*4882a593Smuzhiyun #define LOG_RDMA_EVENT			0x80
122*4882a593Smuzhiyun #define LOG_RDMA_MR			0x100
123*4882a593Smuzhiyun static unsigned int smbd_logging_class;
124*4882a593Smuzhiyun module_param(smbd_logging_class, uint, 0644);
125*4882a593Smuzhiyun MODULE_PARM_DESC(smbd_logging_class,
126*4882a593Smuzhiyun 	"Logging class for SMBD transport 0x0 to 0x100");
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun #define ERR		0x0
129*4882a593Smuzhiyun #define INFO		0x1
130*4882a593Smuzhiyun static unsigned int smbd_logging_level = ERR;
131*4882a593Smuzhiyun module_param(smbd_logging_level, uint, 0644);
132*4882a593Smuzhiyun MODULE_PARM_DESC(smbd_logging_level,
133*4882a593Smuzhiyun 	"Logging level for SMBD transport, 0 (default): error, 1: info");
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun #define log_rdma(level, class, fmt, args...)				\
136*4882a593Smuzhiyun do {									\
137*4882a593Smuzhiyun 	if (level <= smbd_logging_level || class & smbd_logging_class)	\
138*4882a593Smuzhiyun 		cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
139*4882a593Smuzhiyun } while (0)
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun #define log_outgoing(level, fmt, args...) \
142*4882a593Smuzhiyun 		log_rdma(level, LOG_OUTGOING, fmt, ##args)
143*4882a593Smuzhiyun #define log_incoming(level, fmt, args...) \
144*4882a593Smuzhiyun 		log_rdma(level, LOG_INCOMING, fmt, ##args)
145*4882a593Smuzhiyun #define log_read(level, fmt, args...)	log_rdma(level, LOG_READ, fmt, ##args)
146*4882a593Smuzhiyun #define log_write(level, fmt, args...)	log_rdma(level, LOG_WRITE, fmt, ##args)
147*4882a593Smuzhiyun #define log_rdma_send(level, fmt, args...) \
148*4882a593Smuzhiyun 		log_rdma(level, LOG_RDMA_SEND, fmt, ##args)
149*4882a593Smuzhiyun #define log_rdma_recv(level, fmt, args...) \
150*4882a593Smuzhiyun 		log_rdma(level, LOG_RDMA_RECV, fmt, ##args)
151*4882a593Smuzhiyun #define log_keep_alive(level, fmt, args...) \
152*4882a593Smuzhiyun 		log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args)
153*4882a593Smuzhiyun #define log_rdma_event(level, fmt, args...) \
154*4882a593Smuzhiyun 		log_rdma(level, LOG_RDMA_EVENT, fmt, ##args)
155*4882a593Smuzhiyun #define log_rdma_mr(level, fmt, args...) \
156*4882a593Smuzhiyun 		log_rdma(level, LOG_RDMA_MR, fmt, ##args)
157*4882a593Smuzhiyun 
smbd_disconnect_rdma_work(struct work_struct * work)158*4882a593Smuzhiyun static void smbd_disconnect_rdma_work(struct work_struct *work)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun 	struct smbd_connection *info =
161*4882a593Smuzhiyun 		container_of(work, struct smbd_connection, disconnect_work);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	if (info->transport_status == SMBD_CONNECTED) {
164*4882a593Smuzhiyun 		info->transport_status = SMBD_DISCONNECTING;
165*4882a593Smuzhiyun 		rdma_disconnect(info->id);
166*4882a593Smuzhiyun 	}
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun 
smbd_disconnect_rdma_connection(struct smbd_connection * info)169*4882a593Smuzhiyun static void smbd_disconnect_rdma_connection(struct smbd_connection *info)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	queue_work(info->workqueue, &info->disconnect_work);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun /* Upcall from RDMA CM */
smbd_conn_upcall(struct rdma_cm_id * id,struct rdma_cm_event * event)175*4882a593Smuzhiyun static int smbd_conn_upcall(
176*4882a593Smuzhiyun 		struct rdma_cm_id *id, struct rdma_cm_event *event)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun 	struct smbd_connection *info = id->context;
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	log_rdma_event(INFO, "event=%d status=%d\n",
181*4882a593Smuzhiyun 		event->event, event->status);
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	switch (event->event) {
184*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ADDR_RESOLVED:
185*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
186*4882a593Smuzhiyun 		info->ri_rc = 0;
187*4882a593Smuzhiyun 		complete(&info->ri_done);
188*4882a593Smuzhiyun 		break;
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ADDR_ERROR:
191*4882a593Smuzhiyun 		info->ri_rc = -EHOSTUNREACH;
192*4882a593Smuzhiyun 		complete(&info->ri_done);
193*4882a593Smuzhiyun 		break;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ROUTE_ERROR:
196*4882a593Smuzhiyun 		info->ri_rc = -ENETUNREACH;
197*4882a593Smuzhiyun 		complete(&info->ri_done);
198*4882a593Smuzhiyun 		break;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	case RDMA_CM_EVENT_ESTABLISHED:
201*4882a593Smuzhiyun 		log_rdma_event(INFO, "connected event=%d\n", event->event);
202*4882a593Smuzhiyun 		info->transport_status = SMBD_CONNECTED;
203*4882a593Smuzhiyun 		wake_up_interruptible(&info->conn_wait);
204*4882a593Smuzhiyun 		break;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	case RDMA_CM_EVENT_CONNECT_ERROR:
207*4882a593Smuzhiyun 	case RDMA_CM_EVENT_UNREACHABLE:
208*4882a593Smuzhiyun 	case RDMA_CM_EVENT_REJECTED:
209*4882a593Smuzhiyun 		log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
210*4882a593Smuzhiyun 		info->transport_status = SMBD_DISCONNECTED;
211*4882a593Smuzhiyun 		wake_up_interruptible(&info->conn_wait);
212*4882a593Smuzhiyun 		break;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
215*4882a593Smuzhiyun 	case RDMA_CM_EVENT_DISCONNECTED:
216*4882a593Smuzhiyun 		/* This happenes when we fail the negotiation */
217*4882a593Smuzhiyun 		if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
218*4882a593Smuzhiyun 			info->transport_status = SMBD_DISCONNECTED;
219*4882a593Smuzhiyun 			wake_up(&info->conn_wait);
220*4882a593Smuzhiyun 			break;
221*4882a593Smuzhiyun 		}
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 		info->transport_status = SMBD_DISCONNECTED;
224*4882a593Smuzhiyun 		wake_up_interruptible(&info->disconn_wait);
225*4882a593Smuzhiyun 		wake_up_interruptible(&info->wait_reassembly_queue);
226*4882a593Smuzhiyun 		wake_up_interruptible_all(&info->wait_send_queue);
227*4882a593Smuzhiyun 		break;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	default:
230*4882a593Smuzhiyun 		break;
231*4882a593Smuzhiyun 	}
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	return 0;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun /* Upcall from RDMA QP */
237*4882a593Smuzhiyun static void
smbd_qp_async_error_upcall(struct ib_event * event,void * context)238*4882a593Smuzhiyun smbd_qp_async_error_upcall(struct ib_event *event, void *context)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	struct smbd_connection *info = context;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	log_rdma_event(ERR, "%s on device %s info %p\n",
243*4882a593Smuzhiyun 		ib_event_msg(event->event), event->device->name, info);
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 	switch (event->event) {
246*4882a593Smuzhiyun 	case IB_EVENT_CQ_ERR:
247*4882a593Smuzhiyun 	case IB_EVENT_QP_FATAL:
248*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(info);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	default:
251*4882a593Smuzhiyun 		break;
252*4882a593Smuzhiyun 	}
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
smbd_request_payload(struct smbd_request * request)255*4882a593Smuzhiyun static inline void *smbd_request_payload(struct smbd_request *request)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun 	return (void *)request->packet;
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun 
smbd_response_payload(struct smbd_response * response)260*4882a593Smuzhiyun static inline void *smbd_response_payload(struct smbd_response *response)
261*4882a593Smuzhiyun {
262*4882a593Smuzhiyun 	return (void *)response->packet;
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun /* Called when a RDMA send is done */
send_done(struct ib_cq * cq,struct ib_wc * wc)266*4882a593Smuzhiyun static void send_done(struct ib_cq *cq, struct ib_wc *wc)
267*4882a593Smuzhiyun {
268*4882a593Smuzhiyun 	int i;
269*4882a593Smuzhiyun 	struct smbd_request *request =
270*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct smbd_request, cqe);
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n",
273*4882a593Smuzhiyun 		request, wc->status);
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
276*4882a593Smuzhiyun 		log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
277*4882a593Smuzhiyun 			wc->status, wc->opcode);
278*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(request->info);
279*4882a593Smuzhiyun 	}
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	for (i = 0; i < request->num_sge; i++)
282*4882a593Smuzhiyun 		ib_dma_unmap_single(request->info->id->device,
283*4882a593Smuzhiyun 			request->sge[i].addr,
284*4882a593Smuzhiyun 			request->sge[i].length,
285*4882a593Smuzhiyun 			DMA_TO_DEVICE);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	if (atomic_dec_and_test(&request->info->send_pending))
288*4882a593Smuzhiyun 		wake_up(&request->info->wait_send_pending);
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	wake_up(&request->info->wait_post_send);
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	mempool_free(request, request->info->request_mempool);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun 
dump_smbd_negotiate_resp(struct smbd_negotiate_resp * resp)295*4882a593Smuzhiyun static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun 	log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n",
298*4882a593Smuzhiyun 		       resp->min_version, resp->max_version,
299*4882a593Smuzhiyun 		       resp->negotiated_version, resp->credits_requested,
300*4882a593Smuzhiyun 		       resp->credits_granted, resp->status,
301*4882a593Smuzhiyun 		       resp->max_readwrite_size, resp->preferred_send_size,
302*4882a593Smuzhiyun 		       resp->max_receive_size, resp->max_fragmented_size);
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun /*
306*4882a593Smuzhiyun  * Process a negotiation response message, according to [MS-SMBD]3.1.5.7
307*4882a593Smuzhiyun  * response, packet_length: the negotiation response message
308*4882a593Smuzhiyun  * return value: true if negotiation is a success, false if failed
309*4882a593Smuzhiyun  */
process_negotiation_response(struct smbd_response * response,int packet_length)310*4882a593Smuzhiyun static bool process_negotiation_response(
311*4882a593Smuzhiyun 		struct smbd_response *response, int packet_length)
312*4882a593Smuzhiyun {
313*4882a593Smuzhiyun 	struct smbd_connection *info = response->info;
314*4882a593Smuzhiyun 	struct smbd_negotiate_resp *packet = smbd_response_payload(response);
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	if (packet_length < sizeof(struct smbd_negotiate_resp)) {
317*4882a593Smuzhiyun 		log_rdma_event(ERR,
318*4882a593Smuzhiyun 			"error: packet_length=%d\n", packet_length);
319*4882a593Smuzhiyun 		return false;
320*4882a593Smuzhiyun 	}
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
323*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: negotiated_version=%x\n",
324*4882a593Smuzhiyun 			le16_to_cpu(packet->negotiated_version));
325*4882a593Smuzhiyun 		return false;
326*4882a593Smuzhiyun 	}
327*4882a593Smuzhiyun 	info->protocol = le16_to_cpu(packet->negotiated_version);
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	if (packet->credits_requested == 0) {
330*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: credits_requested==0\n");
331*4882a593Smuzhiyun 		return false;
332*4882a593Smuzhiyun 	}
333*4882a593Smuzhiyun 	info->receive_credit_target = le16_to_cpu(packet->credits_requested);
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	if (packet->credits_granted == 0) {
336*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: credits_granted==0\n");
337*4882a593Smuzhiyun 		return false;
338*4882a593Smuzhiyun 	}
339*4882a593Smuzhiyun 	atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted));
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	atomic_set(&info->receive_credits, 0);
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
344*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: preferred_send_size=%d\n",
345*4882a593Smuzhiyun 			le32_to_cpu(packet->preferred_send_size));
346*4882a593Smuzhiyun 		return false;
347*4882a593Smuzhiyun 	}
348*4882a593Smuzhiyun 	info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
351*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: max_receive_size=%d\n",
352*4882a593Smuzhiyun 			le32_to_cpu(packet->max_receive_size));
353*4882a593Smuzhiyun 		return false;
354*4882a593Smuzhiyun 	}
355*4882a593Smuzhiyun 	info->max_send_size = min_t(int, info->max_send_size,
356*4882a593Smuzhiyun 					le32_to_cpu(packet->max_receive_size));
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	if (le32_to_cpu(packet->max_fragmented_size) <
359*4882a593Smuzhiyun 			SMBD_MIN_FRAGMENTED_SIZE) {
360*4882a593Smuzhiyun 		log_rdma_event(ERR, "error: max_fragmented_size=%d\n",
361*4882a593Smuzhiyun 			le32_to_cpu(packet->max_fragmented_size));
362*4882a593Smuzhiyun 		return false;
363*4882a593Smuzhiyun 	}
364*4882a593Smuzhiyun 	info->max_fragmented_send_size =
365*4882a593Smuzhiyun 		le32_to_cpu(packet->max_fragmented_size);
366*4882a593Smuzhiyun 	info->rdma_readwrite_threshold =
367*4882a593Smuzhiyun 		rdma_readwrite_threshold > info->max_fragmented_send_size ?
368*4882a593Smuzhiyun 		info->max_fragmented_send_size :
369*4882a593Smuzhiyun 		rdma_readwrite_threshold;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	info->max_readwrite_size = min_t(u32,
373*4882a593Smuzhiyun 			le32_to_cpu(packet->max_readwrite_size),
374*4882a593Smuzhiyun 			info->max_frmr_depth * PAGE_SIZE);
375*4882a593Smuzhiyun 	info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	return true;
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
smbd_post_send_credits(struct work_struct * work)380*4882a593Smuzhiyun static void smbd_post_send_credits(struct work_struct *work)
381*4882a593Smuzhiyun {
382*4882a593Smuzhiyun 	int ret = 0;
383*4882a593Smuzhiyun 	int use_receive_queue = 1;
384*4882a593Smuzhiyun 	int rc;
385*4882a593Smuzhiyun 	struct smbd_response *response;
386*4882a593Smuzhiyun 	struct smbd_connection *info =
387*4882a593Smuzhiyun 		container_of(work, struct smbd_connection,
388*4882a593Smuzhiyun 			post_send_credits_work);
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
391*4882a593Smuzhiyun 		wake_up(&info->wait_receive_queues);
392*4882a593Smuzhiyun 		return;
393*4882a593Smuzhiyun 	}
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	if (info->receive_credit_target >
396*4882a593Smuzhiyun 		atomic_read(&info->receive_credits)) {
397*4882a593Smuzhiyun 		while (true) {
398*4882a593Smuzhiyun 			if (use_receive_queue)
399*4882a593Smuzhiyun 				response = get_receive_buffer(info);
400*4882a593Smuzhiyun 			else
401*4882a593Smuzhiyun 				response = get_empty_queue_buffer(info);
402*4882a593Smuzhiyun 			if (!response) {
403*4882a593Smuzhiyun 				/* now switch to emtpy packet queue */
404*4882a593Smuzhiyun 				if (use_receive_queue) {
405*4882a593Smuzhiyun 					use_receive_queue = 0;
406*4882a593Smuzhiyun 					continue;
407*4882a593Smuzhiyun 				} else
408*4882a593Smuzhiyun 					break;
409*4882a593Smuzhiyun 			}
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 			response->type = SMBD_TRANSFER_DATA;
412*4882a593Smuzhiyun 			response->first_segment = false;
413*4882a593Smuzhiyun 			rc = smbd_post_recv(info, response);
414*4882a593Smuzhiyun 			if (rc) {
415*4882a593Smuzhiyun 				log_rdma_recv(ERR,
416*4882a593Smuzhiyun 					"post_recv failed rc=%d\n", rc);
417*4882a593Smuzhiyun 				put_receive_buffer(info, response);
418*4882a593Smuzhiyun 				break;
419*4882a593Smuzhiyun 			}
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 			ret++;
422*4882a593Smuzhiyun 		}
423*4882a593Smuzhiyun 	}
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	spin_lock(&info->lock_new_credits_offered);
426*4882a593Smuzhiyun 	info->new_credits_offered += ret;
427*4882a593Smuzhiyun 	spin_unlock(&info->lock_new_credits_offered);
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	/* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */
430*4882a593Smuzhiyun 	info->send_immediate = true;
431*4882a593Smuzhiyun 	if (atomic_read(&info->receive_credits) <
432*4882a593Smuzhiyun 		info->receive_credit_target - 1) {
433*4882a593Smuzhiyun 		if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
434*4882a593Smuzhiyun 		    info->send_immediate) {
435*4882a593Smuzhiyun 			log_keep_alive(INFO, "send an empty message\n");
436*4882a593Smuzhiyun 			smbd_post_send_empty(info);
437*4882a593Smuzhiyun 		}
438*4882a593Smuzhiyun 	}
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun /* Called from softirq, when recv is done */
recv_done(struct ib_cq * cq,struct ib_wc * wc)442*4882a593Smuzhiyun static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun 	struct smbd_data_transfer *data_transfer;
445*4882a593Smuzhiyun 	struct smbd_response *response =
446*4882a593Smuzhiyun 		container_of(wc->wr_cqe, struct smbd_response, cqe);
447*4882a593Smuzhiyun 	struct smbd_connection *info = response->info;
448*4882a593Smuzhiyun 	int data_length = 0;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%x\n",
451*4882a593Smuzhiyun 		      response, response->type, wc->status, wc->opcode,
452*4882a593Smuzhiyun 		      wc->byte_len, wc->pkey_index);
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
455*4882a593Smuzhiyun 		log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
456*4882a593Smuzhiyun 			wc->status, wc->opcode);
457*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(info);
458*4882a593Smuzhiyun 		goto error;
459*4882a593Smuzhiyun 	}
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	ib_dma_sync_single_for_cpu(
462*4882a593Smuzhiyun 		wc->qp->device,
463*4882a593Smuzhiyun 		response->sge.addr,
464*4882a593Smuzhiyun 		response->sge.length,
465*4882a593Smuzhiyun 		DMA_FROM_DEVICE);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	switch (response->type) {
468*4882a593Smuzhiyun 	/* SMBD negotiation response */
469*4882a593Smuzhiyun 	case SMBD_NEGOTIATE_RESP:
470*4882a593Smuzhiyun 		dump_smbd_negotiate_resp(smbd_response_payload(response));
471*4882a593Smuzhiyun 		info->full_packet_received = true;
472*4882a593Smuzhiyun 		info->negotiate_done =
473*4882a593Smuzhiyun 			process_negotiation_response(response, wc->byte_len);
474*4882a593Smuzhiyun 		complete(&info->negotiate_completion);
475*4882a593Smuzhiyun 		break;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 	/* SMBD data transfer packet */
478*4882a593Smuzhiyun 	case SMBD_TRANSFER_DATA:
479*4882a593Smuzhiyun 		data_transfer = smbd_response_payload(response);
480*4882a593Smuzhiyun 		data_length = le32_to_cpu(data_transfer->data_length);
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 		/*
483*4882a593Smuzhiyun 		 * If this is a packet with data playload place the data in
484*4882a593Smuzhiyun 		 * reassembly queue and wake up the reading thread
485*4882a593Smuzhiyun 		 */
486*4882a593Smuzhiyun 		if (data_length) {
487*4882a593Smuzhiyun 			if (info->full_packet_received)
488*4882a593Smuzhiyun 				response->first_segment = true;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 			if (le32_to_cpu(data_transfer->remaining_data_length))
491*4882a593Smuzhiyun 				info->full_packet_received = false;
492*4882a593Smuzhiyun 			else
493*4882a593Smuzhiyun 				info->full_packet_received = true;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 			enqueue_reassembly(
496*4882a593Smuzhiyun 				info,
497*4882a593Smuzhiyun 				response,
498*4882a593Smuzhiyun 				data_length);
499*4882a593Smuzhiyun 		} else
500*4882a593Smuzhiyun 			put_empty_packet(info, response);
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 		if (data_length)
503*4882a593Smuzhiyun 			wake_up_interruptible(&info->wait_reassembly_queue);
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 		atomic_dec(&info->receive_credits);
506*4882a593Smuzhiyun 		info->receive_credit_target =
507*4882a593Smuzhiyun 			le16_to_cpu(data_transfer->credits_requested);
508*4882a593Smuzhiyun 		if (le16_to_cpu(data_transfer->credits_granted)) {
509*4882a593Smuzhiyun 			atomic_add(le16_to_cpu(data_transfer->credits_granted),
510*4882a593Smuzhiyun 				&info->send_credits);
511*4882a593Smuzhiyun 			/*
512*4882a593Smuzhiyun 			 * We have new send credits granted from remote peer
513*4882a593Smuzhiyun 			 * If any sender is waiting for credits, unblock it
514*4882a593Smuzhiyun 			 */
515*4882a593Smuzhiyun 			wake_up_interruptible(&info->wait_send_queue);
516*4882a593Smuzhiyun 		}
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 		log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n",
519*4882a593Smuzhiyun 			     le16_to_cpu(data_transfer->flags),
520*4882a593Smuzhiyun 			     le32_to_cpu(data_transfer->data_offset),
521*4882a593Smuzhiyun 			     le32_to_cpu(data_transfer->data_length),
522*4882a593Smuzhiyun 			     le32_to_cpu(data_transfer->remaining_data_length));
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 		/* Send a KEEP_ALIVE response right away if requested */
525*4882a593Smuzhiyun 		info->keep_alive_requested = KEEP_ALIVE_NONE;
526*4882a593Smuzhiyun 		if (le16_to_cpu(data_transfer->flags) &
527*4882a593Smuzhiyun 				SMB_DIRECT_RESPONSE_REQUESTED) {
528*4882a593Smuzhiyun 			info->keep_alive_requested = KEEP_ALIVE_PENDING;
529*4882a593Smuzhiyun 		}
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 		return;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	default:
534*4882a593Smuzhiyun 		log_rdma_recv(ERR,
535*4882a593Smuzhiyun 			"unexpected response type=%d\n", response->type);
536*4882a593Smuzhiyun 	}
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun error:
539*4882a593Smuzhiyun 	put_receive_buffer(info, response);
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun 
smbd_create_id(struct smbd_connection * info,struct sockaddr * dstaddr,int port)542*4882a593Smuzhiyun static struct rdma_cm_id *smbd_create_id(
543*4882a593Smuzhiyun 		struct smbd_connection *info,
544*4882a593Smuzhiyun 		struct sockaddr *dstaddr, int port)
545*4882a593Smuzhiyun {
546*4882a593Smuzhiyun 	struct rdma_cm_id *id;
547*4882a593Smuzhiyun 	int rc;
548*4882a593Smuzhiyun 	__be16 *sport;
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	id = rdma_create_id(&init_net, smbd_conn_upcall, info,
551*4882a593Smuzhiyun 		RDMA_PS_TCP, IB_QPT_RC);
552*4882a593Smuzhiyun 	if (IS_ERR(id)) {
553*4882a593Smuzhiyun 		rc = PTR_ERR(id);
554*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc);
555*4882a593Smuzhiyun 		return id;
556*4882a593Smuzhiyun 	}
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	if (dstaddr->sa_family == AF_INET6)
559*4882a593Smuzhiyun 		sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
560*4882a593Smuzhiyun 	else
561*4882a593Smuzhiyun 		sport = &((struct sockaddr_in *)dstaddr)->sin_port;
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	*sport = htons(port);
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 	init_completion(&info->ri_done);
566*4882a593Smuzhiyun 	info->ri_rc = -ETIMEDOUT;
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr,
569*4882a593Smuzhiyun 		RDMA_RESOLVE_TIMEOUT);
570*4882a593Smuzhiyun 	if (rc) {
571*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
572*4882a593Smuzhiyun 		goto out;
573*4882a593Smuzhiyun 	}
574*4882a593Smuzhiyun 	wait_for_completion_interruptible_timeout(
575*4882a593Smuzhiyun 		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
576*4882a593Smuzhiyun 	rc = info->ri_rc;
577*4882a593Smuzhiyun 	if (rc) {
578*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
579*4882a593Smuzhiyun 		goto out;
580*4882a593Smuzhiyun 	}
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 	info->ri_rc = -ETIMEDOUT;
583*4882a593Smuzhiyun 	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
584*4882a593Smuzhiyun 	if (rc) {
585*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
586*4882a593Smuzhiyun 		goto out;
587*4882a593Smuzhiyun 	}
588*4882a593Smuzhiyun 	wait_for_completion_interruptible_timeout(
589*4882a593Smuzhiyun 		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
590*4882a593Smuzhiyun 	rc = info->ri_rc;
591*4882a593Smuzhiyun 	if (rc) {
592*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
593*4882a593Smuzhiyun 		goto out;
594*4882a593Smuzhiyun 	}
595*4882a593Smuzhiyun 
596*4882a593Smuzhiyun 	return id;
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun out:
599*4882a593Smuzhiyun 	rdma_destroy_id(id);
600*4882a593Smuzhiyun 	return ERR_PTR(rc);
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun /*
604*4882a593Smuzhiyun  * Test if FRWR (Fast Registration Work Requests) is supported on the device
605*4882a593Smuzhiyun  * This implementation requries FRWR on RDMA read/write
606*4882a593Smuzhiyun  * return value: true if it is supported
607*4882a593Smuzhiyun  */
frwr_is_supported(struct ib_device_attr * attrs)608*4882a593Smuzhiyun static bool frwr_is_supported(struct ib_device_attr *attrs)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun 	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
611*4882a593Smuzhiyun 		return false;
612*4882a593Smuzhiyun 	if (attrs->max_fast_reg_page_list_len == 0)
613*4882a593Smuzhiyun 		return false;
614*4882a593Smuzhiyun 	return true;
615*4882a593Smuzhiyun }
616*4882a593Smuzhiyun 
smbd_ia_open(struct smbd_connection * info,struct sockaddr * dstaddr,int port)617*4882a593Smuzhiyun static int smbd_ia_open(
618*4882a593Smuzhiyun 		struct smbd_connection *info,
619*4882a593Smuzhiyun 		struct sockaddr *dstaddr, int port)
620*4882a593Smuzhiyun {
621*4882a593Smuzhiyun 	int rc;
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	info->id = smbd_create_id(info, dstaddr, port);
624*4882a593Smuzhiyun 	if (IS_ERR(info->id)) {
625*4882a593Smuzhiyun 		rc = PTR_ERR(info->id);
626*4882a593Smuzhiyun 		goto out1;
627*4882a593Smuzhiyun 	}
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 	if (!frwr_is_supported(&info->id->device->attrs)) {
630*4882a593Smuzhiyun 		log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n");
631*4882a593Smuzhiyun 		log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n",
632*4882a593Smuzhiyun 			       info->id->device->attrs.device_cap_flags,
633*4882a593Smuzhiyun 			       info->id->device->attrs.max_fast_reg_page_list_len);
634*4882a593Smuzhiyun 		rc = -EPROTONOSUPPORT;
635*4882a593Smuzhiyun 		goto out2;
636*4882a593Smuzhiyun 	}
637*4882a593Smuzhiyun 	info->max_frmr_depth = min_t(int,
638*4882a593Smuzhiyun 		smbd_max_frmr_depth,
639*4882a593Smuzhiyun 		info->id->device->attrs.max_fast_reg_page_list_len);
640*4882a593Smuzhiyun 	info->mr_type = IB_MR_TYPE_MEM_REG;
641*4882a593Smuzhiyun 	if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
642*4882a593Smuzhiyun 		info->mr_type = IB_MR_TYPE_SG_GAPS;
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	info->pd = ib_alloc_pd(info->id->device, 0);
645*4882a593Smuzhiyun 	if (IS_ERR(info->pd)) {
646*4882a593Smuzhiyun 		rc = PTR_ERR(info->pd);
647*4882a593Smuzhiyun 		log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
648*4882a593Smuzhiyun 		goto out2;
649*4882a593Smuzhiyun 	}
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	return 0;
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun out2:
654*4882a593Smuzhiyun 	rdma_destroy_id(info->id);
655*4882a593Smuzhiyun 	info->id = NULL;
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun out1:
658*4882a593Smuzhiyun 	return rc;
659*4882a593Smuzhiyun }
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun /*
662*4882a593Smuzhiyun  * Send a negotiation request message to the peer
663*4882a593Smuzhiyun  * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3
664*4882a593Smuzhiyun  * After negotiation, the transport is connected and ready for
665*4882a593Smuzhiyun  * carrying upper layer SMB payload
666*4882a593Smuzhiyun  */
smbd_post_send_negotiate_req(struct smbd_connection * info)667*4882a593Smuzhiyun static int smbd_post_send_negotiate_req(struct smbd_connection *info)
668*4882a593Smuzhiyun {
669*4882a593Smuzhiyun 	struct ib_send_wr send_wr;
670*4882a593Smuzhiyun 	int rc = -ENOMEM;
671*4882a593Smuzhiyun 	struct smbd_request *request;
672*4882a593Smuzhiyun 	struct smbd_negotiate_req *packet;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	request = mempool_alloc(info->request_mempool, GFP_KERNEL);
675*4882a593Smuzhiyun 	if (!request)
676*4882a593Smuzhiyun 		return rc;
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	request->info = info;
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun 	packet = smbd_request_payload(request);
681*4882a593Smuzhiyun 	packet->min_version = cpu_to_le16(SMBD_V1);
682*4882a593Smuzhiyun 	packet->max_version = cpu_to_le16(SMBD_V1);
683*4882a593Smuzhiyun 	packet->reserved = 0;
684*4882a593Smuzhiyun 	packet->credits_requested = cpu_to_le16(info->send_credit_target);
685*4882a593Smuzhiyun 	packet->preferred_send_size = cpu_to_le32(info->max_send_size);
686*4882a593Smuzhiyun 	packet->max_receive_size = cpu_to_le32(info->max_receive_size);
687*4882a593Smuzhiyun 	packet->max_fragmented_size =
688*4882a593Smuzhiyun 		cpu_to_le32(info->max_fragmented_recv_size);
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	request->num_sge = 1;
691*4882a593Smuzhiyun 	request->sge[0].addr = ib_dma_map_single(
692*4882a593Smuzhiyun 				info->id->device, (void *)packet,
693*4882a593Smuzhiyun 				sizeof(*packet), DMA_TO_DEVICE);
694*4882a593Smuzhiyun 	if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
695*4882a593Smuzhiyun 		rc = -EIO;
696*4882a593Smuzhiyun 		goto dma_mapping_failed;
697*4882a593Smuzhiyun 	}
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	request->sge[0].length = sizeof(*packet);
700*4882a593Smuzhiyun 	request->sge[0].lkey = info->pd->local_dma_lkey;
701*4882a593Smuzhiyun 
702*4882a593Smuzhiyun 	ib_dma_sync_single_for_device(
703*4882a593Smuzhiyun 		info->id->device, request->sge[0].addr,
704*4882a593Smuzhiyun 		request->sge[0].length, DMA_TO_DEVICE);
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	request->cqe.done = send_done;
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	send_wr.next = NULL;
709*4882a593Smuzhiyun 	send_wr.wr_cqe = &request->cqe;
710*4882a593Smuzhiyun 	send_wr.sg_list = request->sge;
711*4882a593Smuzhiyun 	send_wr.num_sge = request->num_sge;
712*4882a593Smuzhiyun 	send_wr.opcode = IB_WR_SEND;
713*4882a593Smuzhiyun 	send_wr.send_flags = IB_SEND_SIGNALED;
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 	log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n",
716*4882a593Smuzhiyun 		request->sge[0].addr,
717*4882a593Smuzhiyun 		request->sge[0].length, request->sge[0].lkey);
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	atomic_inc(&info->send_pending);
720*4882a593Smuzhiyun 	rc = ib_post_send(info->id->qp, &send_wr, NULL);
721*4882a593Smuzhiyun 	if (!rc)
722*4882a593Smuzhiyun 		return 0;
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 	/* if we reach here, post send failed */
725*4882a593Smuzhiyun 	log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
726*4882a593Smuzhiyun 	atomic_dec(&info->send_pending);
727*4882a593Smuzhiyun 	ib_dma_unmap_single(info->id->device, request->sge[0].addr,
728*4882a593Smuzhiyun 		request->sge[0].length, DMA_TO_DEVICE);
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	smbd_disconnect_rdma_connection(info);
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun dma_mapping_failed:
733*4882a593Smuzhiyun 	mempool_free(request, info->request_mempool);
734*4882a593Smuzhiyun 	return rc;
735*4882a593Smuzhiyun }
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun /*
738*4882a593Smuzhiyun  * Extend the credits to remote peer
739*4882a593Smuzhiyun  * This implements [MS-SMBD] 3.1.5.9
740*4882a593Smuzhiyun  * The idea is that we should extend credits to remote peer as quickly as
741*4882a593Smuzhiyun  * it's allowed, to maintain data flow. We allocate as much receive
742*4882a593Smuzhiyun  * buffer as possible, and extend the receive credits to remote peer
743*4882a593Smuzhiyun  * return value: the new credtis being granted.
744*4882a593Smuzhiyun  */
manage_credits_prior_sending(struct smbd_connection * info)745*4882a593Smuzhiyun static int manage_credits_prior_sending(struct smbd_connection *info)
746*4882a593Smuzhiyun {
747*4882a593Smuzhiyun 	int new_credits;
748*4882a593Smuzhiyun 
749*4882a593Smuzhiyun 	spin_lock(&info->lock_new_credits_offered);
750*4882a593Smuzhiyun 	new_credits = info->new_credits_offered;
751*4882a593Smuzhiyun 	info->new_credits_offered = 0;
752*4882a593Smuzhiyun 	spin_unlock(&info->lock_new_credits_offered);
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	return new_credits;
755*4882a593Smuzhiyun }
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun /*
758*4882a593Smuzhiyun  * Check if we need to send a KEEP_ALIVE message
759*4882a593Smuzhiyun  * The idle connection timer triggers a KEEP_ALIVE message when expires
760*4882a593Smuzhiyun  * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
761*4882a593Smuzhiyun  * back a response.
762*4882a593Smuzhiyun  * return value:
763*4882a593Smuzhiyun  * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
764*4882a593Smuzhiyun  * 0: otherwise
765*4882a593Smuzhiyun  */
manage_keep_alive_before_sending(struct smbd_connection * info)766*4882a593Smuzhiyun static int manage_keep_alive_before_sending(struct smbd_connection *info)
767*4882a593Smuzhiyun {
768*4882a593Smuzhiyun 	if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
769*4882a593Smuzhiyun 		info->keep_alive_requested = KEEP_ALIVE_SENT;
770*4882a593Smuzhiyun 		return 1;
771*4882a593Smuzhiyun 	}
772*4882a593Smuzhiyun 	return 0;
773*4882a593Smuzhiyun }
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun /* Post the send request */
smbd_post_send(struct smbd_connection * info,struct smbd_request * request)776*4882a593Smuzhiyun static int smbd_post_send(struct smbd_connection *info,
777*4882a593Smuzhiyun 		struct smbd_request *request)
778*4882a593Smuzhiyun {
779*4882a593Smuzhiyun 	struct ib_send_wr send_wr;
780*4882a593Smuzhiyun 	int rc, i;
781*4882a593Smuzhiyun 
782*4882a593Smuzhiyun 	for (i = 0; i < request->num_sge; i++) {
783*4882a593Smuzhiyun 		log_rdma_send(INFO,
784*4882a593Smuzhiyun 			"rdma_request sge[%d] addr=%llu length=%u\n",
785*4882a593Smuzhiyun 			i, request->sge[i].addr, request->sge[i].length);
786*4882a593Smuzhiyun 		ib_dma_sync_single_for_device(
787*4882a593Smuzhiyun 			info->id->device,
788*4882a593Smuzhiyun 			request->sge[i].addr,
789*4882a593Smuzhiyun 			request->sge[i].length,
790*4882a593Smuzhiyun 			DMA_TO_DEVICE);
791*4882a593Smuzhiyun 	}
792*4882a593Smuzhiyun 
793*4882a593Smuzhiyun 	request->cqe.done = send_done;
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun 	send_wr.next = NULL;
796*4882a593Smuzhiyun 	send_wr.wr_cqe = &request->cqe;
797*4882a593Smuzhiyun 	send_wr.sg_list = request->sge;
798*4882a593Smuzhiyun 	send_wr.num_sge = request->num_sge;
799*4882a593Smuzhiyun 	send_wr.opcode = IB_WR_SEND;
800*4882a593Smuzhiyun 	send_wr.send_flags = IB_SEND_SIGNALED;
801*4882a593Smuzhiyun 
802*4882a593Smuzhiyun 	rc = ib_post_send(info->id->qp, &send_wr, NULL);
803*4882a593Smuzhiyun 	if (rc) {
804*4882a593Smuzhiyun 		log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
805*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(info);
806*4882a593Smuzhiyun 		rc = -EAGAIN;
807*4882a593Smuzhiyun 	} else
808*4882a593Smuzhiyun 		/* Reset timer for idle connection after packet is sent */
809*4882a593Smuzhiyun 		mod_delayed_work(info->workqueue, &info->idle_timer_work,
810*4882a593Smuzhiyun 			info->keep_alive_interval*HZ);
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 	return rc;
813*4882a593Smuzhiyun }
814*4882a593Smuzhiyun 
smbd_post_send_sgl(struct smbd_connection * info,struct scatterlist * sgl,int data_length,int remaining_data_length)815*4882a593Smuzhiyun static int smbd_post_send_sgl(struct smbd_connection *info,
816*4882a593Smuzhiyun 	struct scatterlist *sgl, int data_length, int remaining_data_length)
817*4882a593Smuzhiyun {
818*4882a593Smuzhiyun 	int num_sgs;
819*4882a593Smuzhiyun 	int i, rc;
820*4882a593Smuzhiyun 	int header_length;
821*4882a593Smuzhiyun 	struct smbd_request *request;
822*4882a593Smuzhiyun 	struct smbd_data_transfer *packet;
823*4882a593Smuzhiyun 	int new_credits;
824*4882a593Smuzhiyun 	struct scatterlist *sg;
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun wait_credit:
827*4882a593Smuzhiyun 	/* Wait for send credits. A SMBD packet needs one credit */
828*4882a593Smuzhiyun 	rc = wait_event_interruptible(info->wait_send_queue,
829*4882a593Smuzhiyun 		atomic_read(&info->send_credits) > 0 ||
830*4882a593Smuzhiyun 		info->transport_status != SMBD_CONNECTED);
831*4882a593Smuzhiyun 	if (rc)
832*4882a593Smuzhiyun 		goto err_wait_credit;
833*4882a593Smuzhiyun 
834*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
835*4882a593Smuzhiyun 		log_outgoing(ERR, "disconnected not sending on wait_credit\n");
836*4882a593Smuzhiyun 		rc = -EAGAIN;
837*4882a593Smuzhiyun 		goto err_wait_credit;
838*4882a593Smuzhiyun 	}
839*4882a593Smuzhiyun 	if (unlikely(atomic_dec_return(&info->send_credits) < 0)) {
840*4882a593Smuzhiyun 		atomic_inc(&info->send_credits);
841*4882a593Smuzhiyun 		goto wait_credit;
842*4882a593Smuzhiyun 	}
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun wait_send_queue:
845*4882a593Smuzhiyun 	wait_event(info->wait_post_send,
846*4882a593Smuzhiyun 		atomic_read(&info->send_pending) < info->send_credit_target ||
847*4882a593Smuzhiyun 		info->transport_status != SMBD_CONNECTED);
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
850*4882a593Smuzhiyun 		log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
851*4882a593Smuzhiyun 		rc = -EAGAIN;
852*4882a593Smuzhiyun 		goto err_wait_send_queue;
853*4882a593Smuzhiyun 	}
854*4882a593Smuzhiyun 
855*4882a593Smuzhiyun 	if (unlikely(atomic_inc_return(&info->send_pending) >
856*4882a593Smuzhiyun 				info->send_credit_target)) {
857*4882a593Smuzhiyun 		atomic_dec(&info->send_pending);
858*4882a593Smuzhiyun 		goto wait_send_queue;
859*4882a593Smuzhiyun 	}
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 	request = mempool_alloc(info->request_mempool, GFP_KERNEL);
862*4882a593Smuzhiyun 	if (!request) {
863*4882a593Smuzhiyun 		rc = -ENOMEM;
864*4882a593Smuzhiyun 		goto err_alloc;
865*4882a593Smuzhiyun 	}
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 	request->info = info;
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 	/* Fill in the packet header */
870*4882a593Smuzhiyun 	packet = smbd_request_payload(request);
871*4882a593Smuzhiyun 	packet->credits_requested = cpu_to_le16(info->send_credit_target);
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 	new_credits = manage_credits_prior_sending(info);
874*4882a593Smuzhiyun 	atomic_add(new_credits, &info->receive_credits);
875*4882a593Smuzhiyun 	packet->credits_granted = cpu_to_le16(new_credits);
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun 	info->send_immediate = false;
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 	packet->flags = 0;
880*4882a593Smuzhiyun 	if (manage_keep_alive_before_sending(info))
881*4882a593Smuzhiyun 		packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	packet->reserved = 0;
884*4882a593Smuzhiyun 	if (!data_length)
885*4882a593Smuzhiyun 		packet->data_offset = 0;
886*4882a593Smuzhiyun 	else
887*4882a593Smuzhiyun 		packet->data_offset = cpu_to_le32(24);
888*4882a593Smuzhiyun 	packet->data_length = cpu_to_le32(data_length);
889*4882a593Smuzhiyun 	packet->remaining_data_length = cpu_to_le32(remaining_data_length);
890*4882a593Smuzhiyun 	packet->padding = 0;
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 	log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
893*4882a593Smuzhiyun 		     le16_to_cpu(packet->credits_requested),
894*4882a593Smuzhiyun 		     le16_to_cpu(packet->credits_granted),
895*4882a593Smuzhiyun 		     le32_to_cpu(packet->data_offset),
896*4882a593Smuzhiyun 		     le32_to_cpu(packet->data_length),
897*4882a593Smuzhiyun 		     le32_to_cpu(packet->remaining_data_length));
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	/* Map the packet to DMA */
900*4882a593Smuzhiyun 	header_length = sizeof(struct smbd_data_transfer);
901*4882a593Smuzhiyun 	/* If this is a packet without payload, don't send padding */
902*4882a593Smuzhiyun 	if (!data_length)
903*4882a593Smuzhiyun 		header_length = offsetof(struct smbd_data_transfer, padding);
904*4882a593Smuzhiyun 
905*4882a593Smuzhiyun 	request->num_sge = 1;
906*4882a593Smuzhiyun 	request->sge[0].addr = ib_dma_map_single(info->id->device,
907*4882a593Smuzhiyun 						 (void *)packet,
908*4882a593Smuzhiyun 						 header_length,
909*4882a593Smuzhiyun 						 DMA_TO_DEVICE);
910*4882a593Smuzhiyun 	if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
911*4882a593Smuzhiyun 		rc = -EIO;
912*4882a593Smuzhiyun 		request->sge[0].addr = 0;
913*4882a593Smuzhiyun 		goto err_dma;
914*4882a593Smuzhiyun 	}
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	request->sge[0].length = header_length;
917*4882a593Smuzhiyun 	request->sge[0].lkey = info->pd->local_dma_lkey;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	/* Fill in the packet data payload */
920*4882a593Smuzhiyun 	num_sgs = sgl ? sg_nents(sgl) : 0;
921*4882a593Smuzhiyun 	for_each_sg(sgl, sg, num_sgs, i) {
922*4882a593Smuzhiyun 		request->sge[i+1].addr =
923*4882a593Smuzhiyun 			ib_dma_map_page(info->id->device, sg_page(sg),
924*4882a593Smuzhiyun 			       sg->offset, sg->length, DMA_TO_DEVICE);
925*4882a593Smuzhiyun 		if (ib_dma_mapping_error(
926*4882a593Smuzhiyun 				info->id->device, request->sge[i+1].addr)) {
927*4882a593Smuzhiyun 			rc = -EIO;
928*4882a593Smuzhiyun 			request->sge[i+1].addr = 0;
929*4882a593Smuzhiyun 			goto err_dma;
930*4882a593Smuzhiyun 		}
931*4882a593Smuzhiyun 		request->sge[i+1].length = sg->length;
932*4882a593Smuzhiyun 		request->sge[i+1].lkey = info->pd->local_dma_lkey;
933*4882a593Smuzhiyun 		request->num_sge++;
934*4882a593Smuzhiyun 	}
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 	rc = smbd_post_send(info, request);
937*4882a593Smuzhiyun 	if (!rc)
938*4882a593Smuzhiyun 		return 0;
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun err_dma:
941*4882a593Smuzhiyun 	for (i = 0; i < request->num_sge; i++)
942*4882a593Smuzhiyun 		if (request->sge[i].addr)
943*4882a593Smuzhiyun 			ib_dma_unmap_single(info->id->device,
944*4882a593Smuzhiyun 					    request->sge[i].addr,
945*4882a593Smuzhiyun 					    request->sge[i].length,
946*4882a593Smuzhiyun 					    DMA_TO_DEVICE);
947*4882a593Smuzhiyun 	mempool_free(request, info->request_mempool);
948*4882a593Smuzhiyun 
949*4882a593Smuzhiyun 	/* roll back receive credits and credits to be offered */
950*4882a593Smuzhiyun 	spin_lock(&info->lock_new_credits_offered);
951*4882a593Smuzhiyun 	info->new_credits_offered += new_credits;
952*4882a593Smuzhiyun 	spin_unlock(&info->lock_new_credits_offered);
953*4882a593Smuzhiyun 	atomic_sub(new_credits, &info->receive_credits);
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun err_alloc:
956*4882a593Smuzhiyun 	if (atomic_dec_and_test(&info->send_pending))
957*4882a593Smuzhiyun 		wake_up(&info->wait_send_pending);
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun err_wait_send_queue:
960*4882a593Smuzhiyun 	/* roll back send credits and pending */
961*4882a593Smuzhiyun 	atomic_inc(&info->send_credits);
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun err_wait_credit:
964*4882a593Smuzhiyun 	return rc;
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun /*
968*4882a593Smuzhiyun  * Send a page
969*4882a593Smuzhiyun  * page: the page to send
970*4882a593Smuzhiyun  * offset: offset in the page to send
971*4882a593Smuzhiyun  * size: length in the page to send
972*4882a593Smuzhiyun  * remaining_data_length: remaining data to send in this payload
973*4882a593Smuzhiyun  */
smbd_post_send_page(struct smbd_connection * info,struct page * page,unsigned long offset,size_t size,int remaining_data_length)974*4882a593Smuzhiyun static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
975*4882a593Smuzhiyun 		unsigned long offset, size_t size, int remaining_data_length)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun 	struct scatterlist sgl;
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	sg_init_table(&sgl, 1);
980*4882a593Smuzhiyun 	sg_set_page(&sgl, page, size, offset);
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
983*4882a593Smuzhiyun }
984*4882a593Smuzhiyun 
985*4882a593Smuzhiyun /*
986*4882a593Smuzhiyun  * Send an empty message
987*4882a593Smuzhiyun  * Empty message is used to extend credits to peer to for keep live
988*4882a593Smuzhiyun  * while there is no upper layer payload to send at the time
989*4882a593Smuzhiyun  */
smbd_post_send_empty(struct smbd_connection * info)990*4882a593Smuzhiyun static int smbd_post_send_empty(struct smbd_connection *info)
991*4882a593Smuzhiyun {
992*4882a593Smuzhiyun 	info->count_send_empty++;
993*4882a593Smuzhiyun 	return smbd_post_send_sgl(info, NULL, 0, 0);
994*4882a593Smuzhiyun }
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun /*
997*4882a593Smuzhiyun  * Send a data buffer
998*4882a593Smuzhiyun  * iov: the iov array describing the data buffers
999*4882a593Smuzhiyun  * n_vec: number of iov array
1000*4882a593Smuzhiyun  * remaining_data_length: remaining data to send following this packet
1001*4882a593Smuzhiyun  * in segmented SMBD packet
1002*4882a593Smuzhiyun  */
smbd_post_send_data(struct smbd_connection * info,struct kvec * iov,int n_vec,int remaining_data_length)1003*4882a593Smuzhiyun static int smbd_post_send_data(
1004*4882a593Smuzhiyun 	struct smbd_connection *info, struct kvec *iov, int n_vec,
1005*4882a593Smuzhiyun 	int remaining_data_length)
1006*4882a593Smuzhiyun {
1007*4882a593Smuzhiyun 	int i;
1008*4882a593Smuzhiyun 	u32 data_length = 0;
1009*4882a593Smuzhiyun 	struct scatterlist sgl[SMBDIRECT_MAX_SGE];
1010*4882a593Smuzhiyun 
1011*4882a593Smuzhiyun 	if (n_vec > SMBDIRECT_MAX_SGE) {
1012*4882a593Smuzhiyun 		cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
1013*4882a593Smuzhiyun 		return -EINVAL;
1014*4882a593Smuzhiyun 	}
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	sg_init_table(sgl, n_vec);
1017*4882a593Smuzhiyun 	for (i = 0; i < n_vec; i++) {
1018*4882a593Smuzhiyun 		data_length += iov[i].iov_len;
1019*4882a593Smuzhiyun 		sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
1020*4882a593Smuzhiyun 	}
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun 	return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
1023*4882a593Smuzhiyun }
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun /*
1026*4882a593Smuzhiyun  * Post a receive request to the transport
1027*4882a593Smuzhiyun  * The remote peer can only send data when a receive request is posted
1028*4882a593Smuzhiyun  * The interaction is controlled by send/receive credit system
1029*4882a593Smuzhiyun  */
smbd_post_recv(struct smbd_connection * info,struct smbd_response * response)1030*4882a593Smuzhiyun static int smbd_post_recv(
1031*4882a593Smuzhiyun 		struct smbd_connection *info, struct smbd_response *response)
1032*4882a593Smuzhiyun {
1033*4882a593Smuzhiyun 	struct ib_recv_wr recv_wr;
1034*4882a593Smuzhiyun 	int rc = -EIO;
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun 	response->sge.addr = ib_dma_map_single(
1037*4882a593Smuzhiyun 				info->id->device, response->packet,
1038*4882a593Smuzhiyun 				info->max_receive_size, DMA_FROM_DEVICE);
1039*4882a593Smuzhiyun 	if (ib_dma_mapping_error(info->id->device, response->sge.addr))
1040*4882a593Smuzhiyun 		return rc;
1041*4882a593Smuzhiyun 
1042*4882a593Smuzhiyun 	response->sge.length = info->max_receive_size;
1043*4882a593Smuzhiyun 	response->sge.lkey = info->pd->local_dma_lkey;
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	response->cqe.done = recv_done;
1046*4882a593Smuzhiyun 
1047*4882a593Smuzhiyun 	recv_wr.wr_cqe = &response->cqe;
1048*4882a593Smuzhiyun 	recv_wr.next = NULL;
1049*4882a593Smuzhiyun 	recv_wr.sg_list = &response->sge;
1050*4882a593Smuzhiyun 	recv_wr.num_sge = 1;
1051*4882a593Smuzhiyun 
1052*4882a593Smuzhiyun 	rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
1053*4882a593Smuzhiyun 	if (rc) {
1054*4882a593Smuzhiyun 		ib_dma_unmap_single(info->id->device, response->sge.addr,
1055*4882a593Smuzhiyun 				    response->sge.length, DMA_FROM_DEVICE);
1056*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(info);
1057*4882a593Smuzhiyun 		log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
1058*4882a593Smuzhiyun 	}
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun 	return rc;
1061*4882a593Smuzhiyun }
1062*4882a593Smuzhiyun 
1063*4882a593Smuzhiyun /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
smbd_negotiate(struct smbd_connection * info)1064*4882a593Smuzhiyun static int smbd_negotiate(struct smbd_connection *info)
1065*4882a593Smuzhiyun {
1066*4882a593Smuzhiyun 	int rc;
1067*4882a593Smuzhiyun 	struct smbd_response *response = get_receive_buffer(info);
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 	response->type = SMBD_NEGOTIATE_RESP;
1070*4882a593Smuzhiyun 	rc = smbd_post_recv(info, response);
1071*4882a593Smuzhiyun 	log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x iov.lkey=%x\n",
1072*4882a593Smuzhiyun 		       rc, response->sge.addr,
1073*4882a593Smuzhiyun 		       response->sge.length, response->sge.lkey);
1074*4882a593Smuzhiyun 	if (rc)
1075*4882a593Smuzhiyun 		return rc;
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun 	init_completion(&info->negotiate_completion);
1078*4882a593Smuzhiyun 	info->negotiate_done = false;
1079*4882a593Smuzhiyun 	rc = smbd_post_send_negotiate_req(info);
1080*4882a593Smuzhiyun 	if (rc)
1081*4882a593Smuzhiyun 		return rc;
1082*4882a593Smuzhiyun 
1083*4882a593Smuzhiyun 	rc = wait_for_completion_interruptible_timeout(
1084*4882a593Smuzhiyun 		&info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ);
1085*4882a593Smuzhiyun 	log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc);
1086*4882a593Smuzhiyun 
1087*4882a593Smuzhiyun 	if (info->negotiate_done)
1088*4882a593Smuzhiyun 		return 0;
1089*4882a593Smuzhiyun 
1090*4882a593Smuzhiyun 	if (rc == 0)
1091*4882a593Smuzhiyun 		rc = -ETIMEDOUT;
1092*4882a593Smuzhiyun 	else if (rc == -ERESTARTSYS)
1093*4882a593Smuzhiyun 		rc = -EINTR;
1094*4882a593Smuzhiyun 	else
1095*4882a593Smuzhiyun 		rc = -ENOTCONN;
1096*4882a593Smuzhiyun 
1097*4882a593Smuzhiyun 	return rc;
1098*4882a593Smuzhiyun }
1099*4882a593Smuzhiyun 
put_empty_packet(struct smbd_connection * info,struct smbd_response * response)1100*4882a593Smuzhiyun static void put_empty_packet(
1101*4882a593Smuzhiyun 		struct smbd_connection *info, struct smbd_response *response)
1102*4882a593Smuzhiyun {
1103*4882a593Smuzhiyun 	spin_lock(&info->empty_packet_queue_lock);
1104*4882a593Smuzhiyun 	list_add_tail(&response->list, &info->empty_packet_queue);
1105*4882a593Smuzhiyun 	info->count_empty_packet_queue++;
1106*4882a593Smuzhiyun 	spin_unlock(&info->empty_packet_queue_lock);
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun 	queue_work(info->workqueue, &info->post_send_credits_work);
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun /*
1112*4882a593Smuzhiyun  * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
1113*4882a593Smuzhiyun  * This is a queue for reassembling upper layer payload and present to upper
1114*4882a593Smuzhiyun  * layer. All the inncoming payload go to the reassembly queue, regardless of
1115*4882a593Smuzhiyun  * if reassembly is required. The uuper layer code reads from the queue for all
1116*4882a593Smuzhiyun  * incoming payloads.
1117*4882a593Smuzhiyun  * Put a received packet to the reassembly queue
1118*4882a593Smuzhiyun  * response: the packet received
1119*4882a593Smuzhiyun  * data_length: the size of payload in this packet
1120*4882a593Smuzhiyun  */
enqueue_reassembly(struct smbd_connection * info,struct smbd_response * response,int data_length)1121*4882a593Smuzhiyun static void enqueue_reassembly(
1122*4882a593Smuzhiyun 	struct smbd_connection *info,
1123*4882a593Smuzhiyun 	struct smbd_response *response,
1124*4882a593Smuzhiyun 	int data_length)
1125*4882a593Smuzhiyun {
1126*4882a593Smuzhiyun 	spin_lock(&info->reassembly_queue_lock);
1127*4882a593Smuzhiyun 	list_add_tail(&response->list, &info->reassembly_queue);
1128*4882a593Smuzhiyun 	info->reassembly_queue_length++;
1129*4882a593Smuzhiyun 	/*
1130*4882a593Smuzhiyun 	 * Make sure reassembly_data_length is updated after list and
1131*4882a593Smuzhiyun 	 * reassembly_queue_length are updated. On the dequeue side
1132*4882a593Smuzhiyun 	 * reassembly_data_length is checked without a lock to determine
1133*4882a593Smuzhiyun 	 * if reassembly_queue_length and list is up to date
1134*4882a593Smuzhiyun 	 */
1135*4882a593Smuzhiyun 	virt_wmb();
1136*4882a593Smuzhiyun 	info->reassembly_data_length += data_length;
1137*4882a593Smuzhiyun 	spin_unlock(&info->reassembly_queue_lock);
1138*4882a593Smuzhiyun 	info->count_reassembly_queue++;
1139*4882a593Smuzhiyun 	info->count_enqueue_reassembly_queue++;
1140*4882a593Smuzhiyun }
1141*4882a593Smuzhiyun 
1142*4882a593Smuzhiyun /*
1143*4882a593Smuzhiyun  * Get the first entry at the front of reassembly queue
1144*4882a593Smuzhiyun  * Caller is responsible for locking
1145*4882a593Smuzhiyun  * return value: the first entry if any, NULL if queue is empty
1146*4882a593Smuzhiyun  */
_get_first_reassembly(struct smbd_connection * info)1147*4882a593Smuzhiyun static struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
1148*4882a593Smuzhiyun {
1149*4882a593Smuzhiyun 	struct smbd_response *ret = NULL;
1150*4882a593Smuzhiyun 
1151*4882a593Smuzhiyun 	if (!list_empty(&info->reassembly_queue)) {
1152*4882a593Smuzhiyun 		ret = list_first_entry(
1153*4882a593Smuzhiyun 			&info->reassembly_queue,
1154*4882a593Smuzhiyun 			struct smbd_response, list);
1155*4882a593Smuzhiyun 	}
1156*4882a593Smuzhiyun 	return ret;
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun 
get_empty_queue_buffer(struct smbd_connection * info)1159*4882a593Smuzhiyun static struct smbd_response *get_empty_queue_buffer(
1160*4882a593Smuzhiyun 		struct smbd_connection *info)
1161*4882a593Smuzhiyun {
1162*4882a593Smuzhiyun 	struct smbd_response *ret = NULL;
1163*4882a593Smuzhiyun 	unsigned long flags;
1164*4882a593Smuzhiyun 
1165*4882a593Smuzhiyun 	spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
1166*4882a593Smuzhiyun 	if (!list_empty(&info->empty_packet_queue)) {
1167*4882a593Smuzhiyun 		ret = list_first_entry(
1168*4882a593Smuzhiyun 			&info->empty_packet_queue,
1169*4882a593Smuzhiyun 			struct smbd_response, list);
1170*4882a593Smuzhiyun 		list_del(&ret->list);
1171*4882a593Smuzhiyun 		info->count_empty_packet_queue--;
1172*4882a593Smuzhiyun 	}
1173*4882a593Smuzhiyun 	spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
1174*4882a593Smuzhiyun 
1175*4882a593Smuzhiyun 	return ret;
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun 
1178*4882a593Smuzhiyun /*
1179*4882a593Smuzhiyun  * Get a receive buffer
1180*4882a593Smuzhiyun  * For each remote send, we need to post a receive. The receive buffers are
1181*4882a593Smuzhiyun  * pre-allocated in advance.
1182*4882a593Smuzhiyun  * return value: the receive buffer, NULL if none is available
1183*4882a593Smuzhiyun  */
get_receive_buffer(struct smbd_connection * info)1184*4882a593Smuzhiyun static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
1185*4882a593Smuzhiyun {
1186*4882a593Smuzhiyun 	struct smbd_response *ret = NULL;
1187*4882a593Smuzhiyun 	unsigned long flags;
1188*4882a593Smuzhiyun 
1189*4882a593Smuzhiyun 	spin_lock_irqsave(&info->receive_queue_lock, flags);
1190*4882a593Smuzhiyun 	if (!list_empty(&info->receive_queue)) {
1191*4882a593Smuzhiyun 		ret = list_first_entry(
1192*4882a593Smuzhiyun 			&info->receive_queue,
1193*4882a593Smuzhiyun 			struct smbd_response, list);
1194*4882a593Smuzhiyun 		list_del(&ret->list);
1195*4882a593Smuzhiyun 		info->count_receive_queue--;
1196*4882a593Smuzhiyun 		info->count_get_receive_buffer++;
1197*4882a593Smuzhiyun 	}
1198*4882a593Smuzhiyun 	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
1199*4882a593Smuzhiyun 
1200*4882a593Smuzhiyun 	return ret;
1201*4882a593Smuzhiyun }
1202*4882a593Smuzhiyun 
1203*4882a593Smuzhiyun /*
1204*4882a593Smuzhiyun  * Return a receive buffer
1205*4882a593Smuzhiyun  * Upon returning of a receive buffer, we can post new receive and extend
1206*4882a593Smuzhiyun  * more receive credits to remote peer. This is done immediately after a
1207*4882a593Smuzhiyun  * receive buffer is returned.
1208*4882a593Smuzhiyun  */
put_receive_buffer(struct smbd_connection * info,struct smbd_response * response)1209*4882a593Smuzhiyun static void put_receive_buffer(
1210*4882a593Smuzhiyun 	struct smbd_connection *info, struct smbd_response *response)
1211*4882a593Smuzhiyun {
1212*4882a593Smuzhiyun 	unsigned long flags;
1213*4882a593Smuzhiyun 
1214*4882a593Smuzhiyun 	ib_dma_unmap_single(info->id->device, response->sge.addr,
1215*4882a593Smuzhiyun 		response->sge.length, DMA_FROM_DEVICE);
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 	spin_lock_irqsave(&info->receive_queue_lock, flags);
1218*4882a593Smuzhiyun 	list_add_tail(&response->list, &info->receive_queue);
1219*4882a593Smuzhiyun 	info->count_receive_queue++;
1220*4882a593Smuzhiyun 	info->count_put_receive_buffer++;
1221*4882a593Smuzhiyun 	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 	queue_work(info->workqueue, &info->post_send_credits_work);
1224*4882a593Smuzhiyun }
1225*4882a593Smuzhiyun 
1226*4882a593Smuzhiyun /* Preallocate all receive buffer on transport establishment */
allocate_receive_buffers(struct smbd_connection * info,int num_buf)1227*4882a593Smuzhiyun static int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
1228*4882a593Smuzhiyun {
1229*4882a593Smuzhiyun 	int i;
1230*4882a593Smuzhiyun 	struct smbd_response *response;
1231*4882a593Smuzhiyun 
1232*4882a593Smuzhiyun 	INIT_LIST_HEAD(&info->reassembly_queue);
1233*4882a593Smuzhiyun 	spin_lock_init(&info->reassembly_queue_lock);
1234*4882a593Smuzhiyun 	info->reassembly_data_length = 0;
1235*4882a593Smuzhiyun 	info->reassembly_queue_length = 0;
1236*4882a593Smuzhiyun 
1237*4882a593Smuzhiyun 	INIT_LIST_HEAD(&info->receive_queue);
1238*4882a593Smuzhiyun 	spin_lock_init(&info->receive_queue_lock);
1239*4882a593Smuzhiyun 	info->count_receive_queue = 0;
1240*4882a593Smuzhiyun 
1241*4882a593Smuzhiyun 	INIT_LIST_HEAD(&info->empty_packet_queue);
1242*4882a593Smuzhiyun 	spin_lock_init(&info->empty_packet_queue_lock);
1243*4882a593Smuzhiyun 	info->count_empty_packet_queue = 0;
1244*4882a593Smuzhiyun 
1245*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_receive_queues);
1246*4882a593Smuzhiyun 
1247*4882a593Smuzhiyun 	for (i = 0; i < num_buf; i++) {
1248*4882a593Smuzhiyun 		response = mempool_alloc(info->response_mempool, GFP_KERNEL);
1249*4882a593Smuzhiyun 		if (!response)
1250*4882a593Smuzhiyun 			goto allocate_failed;
1251*4882a593Smuzhiyun 
1252*4882a593Smuzhiyun 		response->info = info;
1253*4882a593Smuzhiyun 		list_add_tail(&response->list, &info->receive_queue);
1254*4882a593Smuzhiyun 		info->count_receive_queue++;
1255*4882a593Smuzhiyun 	}
1256*4882a593Smuzhiyun 
1257*4882a593Smuzhiyun 	return 0;
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun allocate_failed:
1260*4882a593Smuzhiyun 	while (!list_empty(&info->receive_queue)) {
1261*4882a593Smuzhiyun 		response = list_first_entry(
1262*4882a593Smuzhiyun 				&info->receive_queue,
1263*4882a593Smuzhiyun 				struct smbd_response, list);
1264*4882a593Smuzhiyun 		list_del(&response->list);
1265*4882a593Smuzhiyun 		info->count_receive_queue--;
1266*4882a593Smuzhiyun 
1267*4882a593Smuzhiyun 		mempool_free(response, info->response_mempool);
1268*4882a593Smuzhiyun 	}
1269*4882a593Smuzhiyun 	return -ENOMEM;
1270*4882a593Smuzhiyun }
1271*4882a593Smuzhiyun 
destroy_receive_buffers(struct smbd_connection * info)1272*4882a593Smuzhiyun static void destroy_receive_buffers(struct smbd_connection *info)
1273*4882a593Smuzhiyun {
1274*4882a593Smuzhiyun 	struct smbd_response *response;
1275*4882a593Smuzhiyun 
1276*4882a593Smuzhiyun 	while ((response = get_receive_buffer(info)))
1277*4882a593Smuzhiyun 		mempool_free(response, info->response_mempool);
1278*4882a593Smuzhiyun 
1279*4882a593Smuzhiyun 	while ((response = get_empty_queue_buffer(info)))
1280*4882a593Smuzhiyun 		mempool_free(response, info->response_mempool);
1281*4882a593Smuzhiyun }
1282*4882a593Smuzhiyun 
1283*4882a593Smuzhiyun /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
idle_connection_timer(struct work_struct * work)1284*4882a593Smuzhiyun static void idle_connection_timer(struct work_struct *work)
1285*4882a593Smuzhiyun {
1286*4882a593Smuzhiyun 	struct smbd_connection *info = container_of(
1287*4882a593Smuzhiyun 					work, struct smbd_connection,
1288*4882a593Smuzhiyun 					idle_timer_work.work);
1289*4882a593Smuzhiyun 
1290*4882a593Smuzhiyun 	if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
1291*4882a593Smuzhiyun 		log_keep_alive(ERR,
1292*4882a593Smuzhiyun 			"error status info->keep_alive_requested=%d\n",
1293*4882a593Smuzhiyun 			info->keep_alive_requested);
1294*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(info);
1295*4882a593Smuzhiyun 		return;
1296*4882a593Smuzhiyun 	}
1297*4882a593Smuzhiyun 
1298*4882a593Smuzhiyun 	log_keep_alive(INFO, "about to send an empty idle message\n");
1299*4882a593Smuzhiyun 	smbd_post_send_empty(info);
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun 	/* Setup the next idle timeout work */
1302*4882a593Smuzhiyun 	queue_delayed_work(info->workqueue, &info->idle_timer_work,
1303*4882a593Smuzhiyun 			info->keep_alive_interval*HZ);
1304*4882a593Smuzhiyun }
1305*4882a593Smuzhiyun 
1306*4882a593Smuzhiyun /*
1307*4882a593Smuzhiyun  * Destroy the transport and related RDMA and memory resources
1308*4882a593Smuzhiyun  * Need to go through all the pending counters and make sure on one is using
1309*4882a593Smuzhiyun  * the transport while it is destroyed
1310*4882a593Smuzhiyun  */
smbd_destroy(struct TCP_Server_Info * server)1311*4882a593Smuzhiyun void smbd_destroy(struct TCP_Server_Info *server)
1312*4882a593Smuzhiyun {
1313*4882a593Smuzhiyun 	struct smbd_connection *info = server->smbd_conn;
1314*4882a593Smuzhiyun 	struct smbd_response *response;
1315*4882a593Smuzhiyun 	unsigned long flags;
1316*4882a593Smuzhiyun 
1317*4882a593Smuzhiyun 	if (!info) {
1318*4882a593Smuzhiyun 		log_rdma_event(INFO, "rdma session already destroyed\n");
1319*4882a593Smuzhiyun 		return;
1320*4882a593Smuzhiyun 	}
1321*4882a593Smuzhiyun 
1322*4882a593Smuzhiyun 	log_rdma_event(INFO, "destroying rdma session\n");
1323*4882a593Smuzhiyun 	if (info->transport_status != SMBD_DISCONNECTED) {
1324*4882a593Smuzhiyun 		rdma_disconnect(server->smbd_conn->id);
1325*4882a593Smuzhiyun 		log_rdma_event(INFO, "wait for transport being disconnected\n");
1326*4882a593Smuzhiyun 		wait_event_interruptible(
1327*4882a593Smuzhiyun 			info->disconn_wait,
1328*4882a593Smuzhiyun 			info->transport_status == SMBD_DISCONNECTED);
1329*4882a593Smuzhiyun 	}
1330*4882a593Smuzhiyun 
1331*4882a593Smuzhiyun 	log_rdma_event(INFO, "destroying qp\n");
1332*4882a593Smuzhiyun 	ib_drain_qp(info->id->qp);
1333*4882a593Smuzhiyun 	rdma_destroy_qp(info->id);
1334*4882a593Smuzhiyun 
1335*4882a593Smuzhiyun 	log_rdma_event(INFO, "cancelling idle timer\n");
1336*4882a593Smuzhiyun 	cancel_delayed_work_sync(&info->idle_timer_work);
1337*4882a593Smuzhiyun 
1338*4882a593Smuzhiyun 	log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
1339*4882a593Smuzhiyun 	wait_event(info->wait_send_pending,
1340*4882a593Smuzhiyun 		atomic_read(&info->send_pending) == 0);
1341*4882a593Smuzhiyun 
1342*4882a593Smuzhiyun 	/* It's not posssible for upper layer to get to reassembly */
1343*4882a593Smuzhiyun 	log_rdma_event(INFO, "drain the reassembly queue\n");
1344*4882a593Smuzhiyun 	do {
1345*4882a593Smuzhiyun 		spin_lock_irqsave(&info->reassembly_queue_lock, flags);
1346*4882a593Smuzhiyun 		response = _get_first_reassembly(info);
1347*4882a593Smuzhiyun 		if (response) {
1348*4882a593Smuzhiyun 			list_del(&response->list);
1349*4882a593Smuzhiyun 			spin_unlock_irqrestore(
1350*4882a593Smuzhiyun 				&info->reassembly_queue_lock, flags);
1351*4882a593Smuzhiyun 			put_receive_buffer(info, response);
1352*4882a593Smuzhiyun 		} else
1353*4882a593Smuzhiyun 			spin_unlock_irqrestore(
1354*4882a593Smuzhiyun 				&info->reassembly_queue_lock, flags);
1355*4882a593Smuzhiyun 	} while (response);
1356*4882a593Smuzhiyun 	info->reassembly_data_length = 0;
1357*4882a593Smuzhiyun 
1358*4882a593Smuzhiyun 	log_rdma_event(INFO, "free receive buffers\n");
1359*4882a593Smuzhiyun 	wait_event(info->wait_receive_queues,
1360*4882a593Smuzhiyun 		info->count_receive_queue + info->count_empty_packet_queue
1361*4882a593Smuzhiyun 			== info->receive_credit_max);
1362*4882a593Smuzhiyun 	destroy_receive_buffers(info);
1363*4882a593Smuzhiyun 
1364*4882a593Smuzhiyun 	/*
1365*4882a593Smuzhiyun 	 * For performance reasons, memory registration and deregistration
1366*4882a593Smuzhiyun 	 * are not locked by srv_mutex. It is possible some processes are
1367*4882a593Smuzhiyun 	 * blocked on transport srv_mutex while holding memory registration.
1368*4882a593Smuzhiyun 	 * Release the transport srv_mutex to allow them to hit the failure
1369*4882a593Smuzhiyun 	 * path when sending data, and then release memory registartions.
1370*4882a593Smuzhiyun 	 */
1371*4882a593Smuzhiyun 	log_rdma_event(INFO, "freeing mr list\n");
1372*4882a593Smuzhiyun 	wake_up_interruptible_all(&info->wait_mr);
1373*4882a593Smuzhiyun 	while (atomic_read(&info->mr_used_count)) {
1374*4882a593Smuzhiyun 		mutex_unlock(&server->srv_mutex);
1375*4882a593Smuzhiyun 		msleep(1000);
1376*4882a593Smuzhiyun 		mutex_lock(&server->srv_mutex);
1377*4882a593Smuzhiyun 	}
1378*4882a593Smuzhiyun 	destroy_mr_list(info);
1379*4882a593Smuzhiyun 
1380*4882a593Smuzhiyun 	ib_free_cq(info->send_cq);
1381*4882a593Smuzhiyun 	ib_free_cq(info->recv_cq);
1382*4882a593Smuzhiyun 	ib_dealloc_pd(info->pd);
1383*4882a593Smuzhiyun 	rdma_destroy_id(info->id);
1384*4882a593Smuzhiyun 
1385*4882a593Smuzhiyun 	/* free mempools */
1386*4882a593Smuzhiyun 	mempool_destroy(info->request_mempool);
1387*4882a593Smuzhiyun 	kmem_cache_destroy(info->request_cache);
1388*4882a593Smuzhiyun 
1389*4882a593Smuzhiyun 	mempool_destroy(info->response_mempool);
1390*4882a593Smuzhiyun 	kmem_cache_destroy(info->response_cache);
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 	info->transport_status = SMBD_DESTROYED;
1393*4882a593Smuzhiyun 
1394*4882a593Smuzhiyun 	destroy_workqueue(info->workqueue);
1395*4882a593Smuzhiyun 	log_rdma_event(INFO,  "rdma session destroyed\n");
1396*4882a593Smuzhiyun 	kfree(info);
1397*4882a593Smuzhiyun }
1398*4882a593Smuzhiyun 
1399*4882a593Smuzhiyun /*
1400*4882a593Smuzhiyun  * Reconnect this SMBD connection, called from upper layer
1401*4882a593Smuzhiyun  * return value: 0 on success, or actual error code
1402*4882a593Smuzhiyun  */
smbd_reconnect(struct TCP_Server_Info * server)1403*4882a593Smuzhiyun int smbd_reconnect(struct TCP_Server_Info *server)
1404*4882a593Smuzhiyun {
1405*4882a593Smuzhiyun 	log_rdma_event(INFO, "reconnecting rdma session\n");
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun 	if (!server->smbd_conn) {
1408*4882a593Smuzhiyun 		log_rdma_event(INFO, "rdma session already destroyed\n");
1409*4882a593Smuzhiyun 		goto create_conn;
1410*4882a593Smuzhiyun 	}
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 	/*
1413*4882a593Smuzhiyun 	 * This is possible if transport is disconnected and we haven't received
1414*4882a593Smuzhiyun 	 * notification from RDMA, but upper layer has detected timeout
1415*4882a593Smuzhiyun 	 */
1416*4882a593Smuzhiyun 	if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
1417*4882a593Smuzhiyun 		log_rdma_event(INFO, "disconnecting transport\n");
1418*4882a593Smuzhiyun 		smbd_destroy(server);
1419*4882a593Smuzhiyun 	}
1420*4882a593Smuzhiyun 
1421*4882a593Smuzhiyun create_conn:
1422*4882a593Smuzhiyun 	log_rdma_event(INFO, "creating rdma session\n");
1423*4882a593Smuzhiyun 	server->smbd_conn = smbd_get_connection(
1424*4882a593Smuzhiyun 		server, (struct sockaddr *) &server->dstaddr);
1425*4882a593Smuzhiyun 
1426*4882a593Smuzhiyun 	if (server->smbd_conn)
1427*4882a593Smuzhiyun 		cifs_dbg(VFS, "RDMA transport re-established\n");
1428*4882a593Smuzhiyun 
1429*4882a593Smuzhiyun 	return server->smbd_conn ? 0 : -ENOENT;
1430*4882a593Smuzhiyun }
1431*4882a593Smuzhiyun 
destroy_caches_and_workqueue(struct smbd_connection * info)1432*4882a593Smuzhiyun static void destroy_caches_and_workqueue(struct smbd_connection *info)
1433*4882a593Smuzhiyun {
1434*4882a593Smuzhiyun 	destroy_receive_buffers(info);
1435*4882a593Smuzhiyun 	destroy_workqueue(info->workqueue);
1436*4882a593Smuzhiyun 	mempool_destroy(info->response_mempool);
1437*4882a593Smuzhiyun 	kmem_cache_destroy(info->response_cache);
1438*4882a593Smuzhiyun 	mempool_destroy(info->request_mempool);
1439*4882a593Smuzhiyun 	kmem_cache_destroy(info->request_cache);
1440*4882a593Smuzhiyun }
1441*4882a593Smuzhiyun 
1442*4882a593Smuzhiyun #define MAX_NAME_LEN	80
allocate_caches_and_workqueue(struct smbd_connection * info)1443*4882a593Smuzhiyun static int allocate_caches_and_workqueue(struct smbd_connection *info)
1444*4882a593Smuzhiyun {
1445*4882a593Smuzhiyun 	char name[MAX_NAME_LEN];
1446*4882a593Smuzhiyun 	int rc;
1447*4882a593Smuzhiyun 
1448*4882a593Smuzhiyun 	scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
1449*4882a593Smuzhiyun 	info->request_cache =
1450*4882a593Smuzhiyun 		kmem_cache_create(
1451*4882a593Smuzhiyun 			name,
1452*4882a593Smuzhiyun 			sizeof(struct smbd_request) +
1453*4882a593Smuzhiyun 				sizeof(struct smbd_data_transfer),
1454*4882a593Smuzhiyun 			0, SLAB_HWCACHE_ALIGN, NULL);
1455*4882a593Smuzhiyun 	if (!info->request_cache)
1456*4882a593Smuzhiyun 		return -ENOMEM;
1457*4882a593Smuzhiyun 
1458*4882a593Smuzhiyun 	info->request_mempool =
1459*4882a593Smuzhiyun 		mempool_create(info->send_credit_target, mempool_alloc_slab,
1460*4882a593Smuzhiyun 			mempool_free_slab, info->request_cache);
1461*4882a593Smuzhiyun 	if (!info->request_mempool)
1462*4882a593Smuzhiyun 		goto out1;
1463*4882a593Smuzhiyun 
1464*4882a593Smuzhiyun 	scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
1465*4882a593Smuzhiyun 	info->response_cache =
1466*4882a593Smuzhiyun 		kmem_cache_create(
1467*4882a593Smuzhiyun 			name,
1468*4882a593Smuzhiyun 			sizeof(struct smbd_response) +
1469*4882a593Smuzhiyun 				info->max_receive_size,
1470*4882a593Smuzhiyun 			0, SLAB_HWCACHE_ALIGN, NULL);
1471*4882a593Smuzhiyun 	if (!info->response_cache)
1472*4882a593Smuzhiyun 		goto out2;
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	info->response_mempool =
1475*4882a593Smuzhiyun 		mempool_create(info->receive_credit_max, mempool_alloc_slab,
1476*4882a593Smuzhiyun 		       mempool_free_slab, info->response_cache);
1477*4882a593Smuzhiyun 	if (!info->response_mempool)
1478*4882a593Smuzhiyun 		goto out3;
1479*4882a593Smuzhiyun 
1480*4882a593Smuzhiyun 	scnprintf(name, MAX_NAME_LEN, "smbd_%p", info);
1481*4882a593Smuzhiyun 	info->workqueue = create_workqueue(name);
1482*4882a593Smuzhiyun 	if (!info->workqueue)
1483*4882a593Smuzhiyun 		goto out4;
1484*4882a593Smuzhiyun 
1485*4882a593Smuzhiyun 	rc = allocate_receive_buffers(info, info->receive_credit_max);
1486*4882a593Smuzhiyun 	if (rc) {
1487*4882a593Smuzhiyun 		log_rdma_event(ERR, "failed to allocate receive buffers\n");
1488*4882a593Smuzhiyun 		goto out5;
1489*4882a593Smuzhiyun 	}
1490*4882a593Smuzhiyun 
1491*4882a593Smuzhiyun 	return 0;
1492*4882a593Smuzhiyun 
1493*4882a593Smuzhiyun out5:
1494*4882a593Smuzhiyun 	destroy_workqueue(info->workqueue);
1495*4882a593Smuzhiyun out4:
1496*4882a593Smuzhiyun 	mempool_destroy(info->response_mempool);
1497*4882a593Smuzhiyun out3:
1498*4882a593Smuzhiyun 	kmem_cache_destroy(info->response_cache);
1499*4882a593Smuzhiyun out2:
1500*4882a593Smuzhiyun 	mempool_destroy(info->request_mempool);
1501*4882a593Smuzhiyun out1:
1502*4882a593Smuzhiyun 	kmem_cache_destroy(info->request_cache);
1503*4882a593Smuzhiyun 	return -ENOMEM;
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun 
1506*4882a593Smuzhiyun /* Create a SMBD connection, called by upper layer */
_smbd_get_connection(struct TCP_Server_Info * server,struct sockaddr * dstaddr,int port)1507*4882a593Smuzhiyun static struct smbd_connection *_smbd_get_connection(
1508*4882a593Smuzhiyun 	struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port)
1509*4882a593Smuzhiyun {
1510*4882a593Smuzhiyun 	int rc;
1511*4882a593Smuzhiyun 	struct smbd_connection *info;
1512*4882a593Smuzhiyun 	struct rdma_conn_param conn_param;
1513*4882a593Smuzhiyun 	struct ib_qp_init_attr qp_attr;
1514*4882a593Smuzhiyun 	struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
1515*4882a593Smuzhiyun 	struct ib_port_immutable port_immutable;
1516*4882a593Smuzhiyun 	u32 ird_ord_hdr[2];
1517*4882a593Smuzhiyun 
1518*4882a593Smuzhiyun 	info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
1519*4882a593Smuzhiyun 	if (!info)
1520*4882a593Smuzhiyun 		return NULL;
1521*4882a593Smuzhiyun 
1522*4882a593Smuzhiyun 	info->transport_status = SMBD_CONNECTING;
1523*4882a593Smuzhiyun 	rc = smbd_ia_open(info, dstaddr, port);
1524*4882a593Smuzhiyun 	if (rc) {
1525*4882a593Smuzhiyun 		log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
1526*4882a593Smuzhiyun 		goto create_id_failed;
1527*4882a593Smuzhiyun 	}
1528*4882a593Smuzhiyun 
1529*4882a593Smuzhiyun 	if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
1530*4882a593Smuzhiyun 	    smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
1531*4882a593Smuzhiyun 		log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1532*4882a593Smuzhiyun 			       smbd_send_credit_target,
1533*4882a593Smuzhiyun 			       info->id->device->attrs.max_cqe,
1534*4882a593Smuzhiyun 			       info->id->device->attrs.max_qp_wr);
1535*4882a593Smuzhiyun 		goto config_failed;
1536*4882a593Smuzhiyun 	}
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 	if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
1539*4882a593Smuzhiyun 	    smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
1540*4882a593Smuzhiyun 		log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1541*4882a593Smuzhiyun 			       smbd_receive_credit_max,
1542*4882a593Smuzhiyun 			       info->id->device->attrs.max_cqe,
1543*4882a593Smuzhiyun 			       info->id->device->attrs.max_qp_wr);
1544*4882a593Smuzhiyun 		goto config_failed;
1545*4882a593Smuzhiyun 	}
1546*4882a593Smuzhiyun 
1547*4882a593Smuzhiyun 	info->receive_credit_max = smbd_receive_credit_max;
1548*4882a593Smuzhiyun 	info->send_credit_target = smbd_send_credit_target;
1549*4882a593Smuzhiyun 	info->max_send_size = smbd_max_send_size;
1550*4882a593Smuzhiyun 	info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
1551*4882a593Smuzhiyun 	info->max_receive_size = smbd_max_receive_size;
1552*4882a593Smuzhiyun 	info->keep_alive_interval = smbd_keep_alive_interval;
1553*4882a593Smuzhiyun 
1554*4882a593Smuzhiyun 	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
1555*4882a593Smuzhiyun 		log_rdma_event(ERR,
1556*4882a593Smuzhiyun 			"warning: device max_send_sge = %d too small\n",
1557*4882a593Smuzhiyun 			info->id->device->attrs.max_send_sge);
1558*4882a593Smuzhiyun 		log_rdma_event(ERR, "Queue Pair creation may fail\n");
1559*4882a593Smuzhiyun 	}
1560*4882a593Smuzhiyun 	if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
1561*4882a593Smuzhiyun 		log_rdma_event(ERR,
1562*4882a593Smuzhiyun 			"warning: device max_recv_sge = %d too small\n",
1563*4882a593Smuzhiyun 			info->id->device->attrs.max_recv_sge);
1564*4882a593Smuzhiyun 		log_rdma_event(ERR, "Queue Pair creation may fail\n");
1565*4882a593Smuzhiyun 	}
1566*4882a593Smuzhiyun 
1567*4882a593Smuzhiyun 	info->send_cq = NULL;
1568*4882a593Smuzhiyun 	info->recv_cq = NULL;
1569*4882a593Smuzhiyun 	info->send_cq =
1570*4882a593Smuzhiyun 		ib_alloc_cq_any(info->id->device, info,
1571*4882a593Smuzhiyun 				info->send_credit_target, IB_POLL_SOFTIRQ);
1572*4882a593Smuzhiyun 	if (IS_ERR(info->send_cq)) {
1573*4882a593Smuzhiyun 		info->send_cq = NULL;
1574*4882a593Smuzhiyun 		goto alloc_cq_failed;
1575*4882a593Smuzhiyun 	}
1576*4882a593Smuzhiyun 
1577*4882a593Smuzhiyun 	info->recv_cq =
1578*4882a593Smuzhiyun 		ib_alloc_cq_any(info->id->device, info,
1579*4882a593Smuzhiyun 				info->receive_credit_max, IB_POLL_SOFTIRQ);
1580*4882a593Smuzhiyun 	if (IS_ERR(info->recv_cq)) {
1581*4882a593Smuzhiyun 		info->recv_cq = NULL;
1582*4882a593Smuzhiyun 		goto alloc_cq_failed;
1583*4882a593Smuzhiyun 	}
1584*4882a593Smuzhiyun 
1585*4882a593Smuzhiyun 	memset(&qp_attr, 0, sizeof(qp_attr));
1586*4882a593Smuzhiyun 	qp_attr.event_handler = smbd_qp_async_error_upcall;
1587*4882a593Smuzhiyun 	qp_attr.qp_context = info;
1588*4882a593Smuzhiyun 	qp_attr.cap.max_send_wr = info->send_credit_target;
1589*4882a593Smuzhiyun 	qp_attr.cap.max_recv_wr = info->receive_credit_max;
1590*4882a593Smuzhiyun 	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
1591*4882a593Smuzhiyun 	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
1592*4882a593Smuzhiyun 	qp_attr.cap.max_inline_data = 0;
1593*4882a593Smuzhiyun 	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1594*4882a593Smuzhiyun 	qp_attr.qp_type = IB_QPT_RC;
1595*4882a593Smuzhiyun 	qp_attr.send_cq = info->send_cq;
1596*4882a593Smuzhiyun 	qp_attr.recv_cq = info->recv_cq;
1597*4882a593Smuzhiyun 	qp_attr.port_num = ~0;
1598*4882a593Smuzhiyun 
1599*4882a593Smuzhiyun 	rc = rdma_create_qp(info->id, info->pd, &qp_attr);
1600*4882a593Smuzhiyun 	if (rc) {
1601*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
1602*4882a593Smuzhiyun 		goto create_qp_failed;
1603*4882a593Smuzhiyun 	}
1604*4882a593Smuzhiyun 
1605*4882a593Smuzhiyun 	memset(&conn_param, 0, sizeof(conn_param));
1606*4882a593Smuzhiyun 	conn_param.initiator_depth = 0;
1607*4882a593Smuzhiyun 
1608*4882a593Smuzhiyun 	conn_param.responder_resources =
1609*4882a593Smuzhiyun 		info->id->device->attrs.max_qp_rd_atom
1610*4882a593Smuzhiyun 			< SMBD_CM_RESPONDER_RESOURCES ?
1611*4882a593Smuzhiyun 		info->id->device->attrs.max_qp_rd_atom :
1612*4882a593Smuzhiyun 		SMBD_CM_RESPONDER_RESOURCES;
1613*4882a593Smuzhiyun 	info->responder_resources = conn_param.responder_resources;
1614*4882a593Smuzhiyun 	log_rdma_mr(INFO, "responder_resources=%d\n",
1615*4882a593Smuzhiyun 		info->responder_resources);
1616*4882a593Smuzhiyun 
1617*4882a593Smuzhiyun 	/* Need to send IRD/ORD in private data for iWARP */
1618*4882a593Smuzhiyun 	info->id->device->ops.get_port_immutable(
1619*4882a593Smuzhiyun 		info->id->device, info->id->port_num, &port_immutable);
1620*4882a593Smuzhiyun 	if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1621*4882a593Smuzhiyun 		ird_ord_hdr[0] = info->responder_resources;
1622*4882a593Smuzhiyun 		ird_ord_hdr[1] = 1;
1623*4882a593Smuzhiyun 		conn_param.private_data = ird_ord_hdr;
1624*4882a593Smuzhiyun 		conn_param.private_data_len = sizeof(ird_ord_hdr);
1625*4882a593Smuzhiyun 	} else {
1626*4882a593Smuzhiyun 		conn_param.private_data = NULL;
1627*4882a593Smuzhiyun 		conn_param.private_data_len = 0;
1628*4882a593Smuzhiyun 	}
1629*4882a593Smuzhiyun 
1630*4882a593Smuzhiyun 	conn_param.retry_count = SMBD_CM_RETRY;
1631*4882a593Smuzhiyun 	conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
1632*4882a593Smuzhiyun 	conn_param.flow_control = 0;
1633*4882a593Smuzhiyun 
1634*4882a593Smuzhiyun 	log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
1635*4882a593Smuzhiyun 		&addr_in->sin_addr, port);
1636*4882a593Smuzhiyun 
1637*4882a593Smuzhiyun 	init_waitqueue_head(&info->conn_wait);
1638*4882a593Smuzhiyun 	init_waitqueue_head(&info->disconn_wait);
1639*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_reassembly_queue);
1640*4882a593Smuzhiyun 	rc = rdma_connect(info->id, &conn_param);
1641*4882a593Smuzhiyun 	if (rc) {
1642*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
1643*4882a593Smuzhiyun 		goto rdma_connect_failed;
1644*4882a593Smuzhiyun 	}
1645*4882a593Smuzhiyun 
1646*4882a593Smuzhiyun 	wait_event_interruptible(
1647*4882a593Smuzhiyun 		info->conn_wait, info->transport_status != SMBD_CONNECTING);
1648*4882a593Smuzhiyun 
1649*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
1650*4882a593Smuzhiyun 		log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
1651*4882a593Smuzhiyun 		goto rdma_connect_failed;
1652*4882a593Smuzhiyun 	}
1653*4882a593Smuzhiyun 
1654*4882a593Smuzhiyun 	log_rdma_event(INFO, "rdma_connect connected\n");
1655*4882a593Smuzhiyun 
1656*4882a593Smuzhiyun 	rc = allocate_caches_and_workqueue(info);
1657*4882a593Smuzhiyun 	if (rc) {
1658*4882a593Smuzhiyun 		log_rdma_event(ERR, "cache allocation failed\n");
1659*4882a593Smuzhiyun 		goto allocate_cache_failed;
1660*4882a593Smuzhiyun 	}
1661*4882a593Smuzhiyun 
1662*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_send_queue);
1663*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
1664*4882a593Smuzhiyun 	queue_delayed_work(info->workqueue, &info->idle_timer_work,
1665*4882a593Smuzhiyun 		info->keep_alive_interval*HZ);
1666*4882a593Smuzhiyun 
1667*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_send_pending);
1668*4882a593Smuzhiyun 	atomic_set(&info->send_pending, 0);
1669*4882a593Smuzhiyun 
1670*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_post_send);
1671*4882a593Smuzhiyun 
1672*4882a593Smuzhiyun 	INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
1673*4882a593Smuzhiyun 	INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
1674*4882a593Smuzhiyun 	info->new_credits_offered = 0;
1675*4882a593Smuzhiyun 	spin_lock_init(&info->lock_new_credits_offered);
1676*4882a593Smuzhiyun 
1677*4882a593Smuzhiyun 	rc = smbd_negotiate(info);
1678*4882a593Smuzhiyun 	if (rc) {
1679*4882a593Smuzhiyun 		log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc);
1680*4882a593Smuzhiyun 		goto negotiation_failed;
1681*4882a593Smuzhiyun 	}
1682*4882a593Smuzhiyun 
1683*4882a593Smuzhiyun 	rc = allocate_mr_list(info);
1684*4882a593Smuzhiyun 	if (rc) {
1685*4882a593Smuzhiyun 		log_rdma_mr(ERR, "memory registration allocation failed\n");
1686*4882a593Smuzhiyun 		goto allocate_mr_failed;
1687*4882a593Smuzhiyun 	}
1688*4882a593Smuzhiyun 
1689*4882a593Smuzhiyun 	return info;
1690*4882a593Smuzhiyun 
1691*4882a593Smuzhiyun allocate_mr_failed:
1692*4882a593Smuzhiyun 	/* At this point, need to a full transport shutdown */
1693*4882a593Smuzhiyun 	smbd_destroy(server);
1694*4882a593Smuzhiyun 	return NULL;
1695*4882a593Smuzhiyun 
1696*4882a593Smuzhiyun negotiation_failed:
1697*4882a593Smuzhiyun 	cancel_delayed_work_sync(&info->idle_timer_work);
1698*4882a593Smuzhiyun 	destroy_caches_and_workqueue(info);
1699*4882a593Smuzhiyun 	info->transport_status = SMBD_NEGOTIATE_FAILED;
1700*4882a593Smuzhiyun 	init_waitqueue_head(&info->conn_wait);
1701*4882a593Smuzhiyun 	rdma_disconnect(info->id);
1702*4882a593Smuzhiyun 	wait_event(info->conn_wait,
1703*4882a593Smuzhiyun 		info->transport_status == SMBD_DISCONNECTED);
1704*4882a593Smuzhiyun 
1705*4882a593Smuzhiyun allocate_cache_failed:
1706*4882a593Smuzhiyun rdma_connect_failed:
1707*4882a593Smuzhiyun 	rdma_destroy_qp(info->id);
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun create_qp_failed:
1710*4882a593Smuzhiyun alloc_cq_failed:
1711*4882a593Smuzhiyun 	if (info->send_cq)
1712*4882a593Smuzhiyun 		ib_free_cq(info->send_cq);
1713*4882a593Smuzhiyun 	if (info->recv_cq)
1714*4882a593Smuzhiyun 		ib_free_cq(info->recv_cq);
1715*4882a593Smuzhiyun 
1716*4882a593Smuzhiyun config_failed:
1717*4882a593Smuzhiyun 	ib_dealloc_pd(info->pd);
1718*4882a593Smuzhiyun 	rdma_destroy_id(info->id);
1719*4882a593Smuzhiyun 
1720*4882a593Smuzhiyun create_id_failed:
1721*4882a593Smuzhiyun 	kfree(info);
1722*4882a593Smuzhiyun 	return NULL;
1723*4882a593Smuzhiyun }
1724*4882a593Smuzhiyun 
smbd_get_connection(struct TCP_Server_Info * server,struct sockaddr * dstaddr)1725*4882a593Smuzhiyun struct smbd_connection *smbd_get_connection(
1726*4882a593Smuzhiyun 	struct TCP_Server_Info *server, struct sockaddr *dstaddr)
1727*4882a593Smuzhiyun {
1728*4882a593Smuzhiyun 	struct smbd_connection *ret;
1729*4882a593Smuzhiyun 	int port = SMBD_PORT;
1730*4882a593Smuzhiyun 
1731*4882a593Smuzhiyun try_again:
1732*4882a593Smuzhiyun 	ret = _smbd_get_connection(server, dstaddr, port);
1733*4882a593Smuzhiyun 
1734*4882a593Smuzhiyun 	/* Try SMB_PORT if SMBD_PORT doesn't work */
1735*4882a593Smuzhiyun 	if (!ret && port == SMBD_PORT) {
1736*4882a593Smuzhiyun 		port = SMB_PORT;
1737*4882a593Smuzhiyun 		goto try_again;
1738*4882a593Smuzhiyun 	}
1739*4882a593Smuzhiyun 	return ret;
1740*4882a593Smuzhiyun }
1741*4882a593Smuzhiyun 
1742*4882a593Smuzhiyun /*
1743*4882a593Smuzhiyun  * Receive data from receive reassembly queue
1744*4882a593Smuzhiyun  * All the incoming data packets are placed in reassembly queue
1745*4882a593Smuzhiyun  * buf: the buffer to read data into
1746*4882a593Smuzhiyun  * size: the length of data to read
1747*4882a593Smuzhiyun  * return value: actual data read
1748*4882a593Smuzhiyun  * Note: this implementation copies the data from reassebmly queue to receive
1749*4882a593Smuzhiyun  * buffers used by upper layer. This is not the optimal code path. A better way
1750*4882a593Smuzhiyun  * to do it is to not have upper layer allocate its receive buffers but rather
1751*4882a593Smuzhiyun  * borrow the buffer from reassembly queue, and return it after data is
1752*4882a593Smuzhiyun  * consumed. But this will require more changes to upper layer code, and also
1753*4882a593Smuzhiyun  * need to consider packet boundaries while they still being reassembled.
1754*4882a593Smuzhiyun  */
smbd_recv_buf(struct smbd_connection * info,char * buf,unsigned int size)1755*4882a593Smuzhiyun static int smbd_recv_buf(struct smbd_connection *info, char *buf,
1756*4882a593Smuzhiyun 		unsigned int size)
1757*4882a593Smuzhiyun {
1758*4882a593Smuzhiyun 	struct smbd_response *response;
1759*4882a593Smuzhiyun 	struct smbd_data_transfer *data_transfer;
1760*4882a593Smuzhiyun 	int to_copy, to_read, data_read, offset;
1761*4882a593Smuzhiyun 	u32 data_length, remaining_data_length, data_offset;
1762*4882a593Smuzhiyun 	int rc;
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun again:
1765*4882a593Smuzhiyun 	/*
1766*4882a593Smuzhiyun 	 * No need to hold the reassembly queue lock all the time as we are
1767*4882a593Smuzhiyun 	 * the only one reading from the front of the queue. The transport
1768*4882a593Smuzhiyun 	 * may add more entries to the back of the queue at the same time
1769*4882a593Smuzhiyun 	 */
1770*4882a593Smuzhiyun 	log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
1771*4882a593Smuzhiyun 		info->reassembly_data_length);
1772*4882a593Smuzhiyun 	if (info->reassembly_data_length >= size) {
1773*4882a593Smuzhiyun 		int queue_length;
1774*4882a593Smuzhiyun 		int queue_removed = 0;
1775*4882a593Smuzhiyun 
1776*4882a593Smuzhiyun 		/*
1777*4882a593Smuzhiyun 		 * Need to make sure reassembly_data_length is read before
1778*4882a593Smuzhiyun 		 * reading reassembly_queue_length and calling
1779*4882a593Smuzhiyun 		 * _get_first_reassembly. This call is lock free
1780*4882a593Smuzhiyun 		 * as we never read at the end of the queue which are being
1781*4882a593Smuzhiyun 		 * updated in SOFTIRQ as more data is received
1782*4882a593Smuzhiyun 		 */
1783*4882a593Smuzhiyun 		virt_rmb();
1784*4882a593Smuzhiyun 		queue_length = info->reassembly_queue_length;
1785*4882a593Smuzhiyun 		data_read = 0;
1786*4882a593Smuzhiyun 		to_read = size;
1787*4882a593Smuzhiyun 		offset = info->first_entry_offset;
1788*4882a593Smuzhiyun 		while (data_read < size) {
1789*4882a593Smuzhiyun 			response = _get_first_reassembly(info);
1790*4882a593Smuzhiyun 			data_transfer = smbd_response_payload(response);
1791*4882a593Smuzhiyun 			data_length = le32_to_cpu(data_transfer->data_length);
1792*4882a593Smuzhiyun 			remaining_data_length =
1793*4882a593Smuzhiyun 				le32_to_cpu(
1794*4882a593Smuzhiyun 					data_transfer->remaining_data_length);
1795*4882a593Smuzhiyun 			data_offset = le32_to_cpu(data_transfer->data_offset);
1796*4882a593Smuzhiyun 
1797*4882a593Smuzhiyun 			/*
1798*4882a593Smuzhiyun 			 * The upper layer expects RFC1002 length at the
1799*4882a593Smuzhiyun 			 * beginning of the payload. Return it to indicate
1800*4882a593Smuzhiyun 			 * the total length of the packet. This minimize the
1801*4882a593Smuzhiyun 			 * change to upper layer packet processing logic. This
1802*4882a593Smuzhiyun 			 * will be eventually remove when an intermediate
1803*4882a593Smuzhiyun 			 * transport layer is added
1804*4882a593Smuzhiyun 			 */
1805*4882a593Smuzhiyun 			if (response->first_segment && size == 4) {
1806*4882a593Smuzhiyun 				unsigned int rfc1002_len =
1807*4882a593Smuzhiyun 					data_length + remaining_data_length;
1808*4882a593Smuzhiyun 				*((__be32 *)buf) = cpu_to_be32(rfc1002_len);
1809*4882a593Smuzhiyun 				data_read = 4;
1810*4882a593Smuzhiyun 				response->first_segment = false;
1811*4882a593Smuzhiyun 				log_read(INFO, "returning rfc1002 length %d\n",
1812*4882a593Smuzhiyun 					rfc1002_len);
1813*4882a593Smuzhiyun 				goto read_rfc1002_done;
1814*4882a593Smuzhiyun 			}
1815*4882a593Smuzhiyun 
1816*4882a593Smuzhiyun 			to_copy = min_t(int, data_length - offset, to_read);
1817*4882a593Smuzhiyun 			memcpy(
1818*4882a593Smuzhiyun 				buf + data_read,
1819*4882a593Smuzhiyun 				(char *)data_transfer + data_offset + offset,
1820*4882a593Smuzhiyun 				to_copy);
1821*4882a593Smuzhiyun 
1822*4882a593Smuzhiyun 			/* move on to the next buffer? */
1823*4882a593Smuzhiyun 			if (to_copy == data_length - offset) {
1824*4882a593Smuzhiyun 				queue_length--;
1825*4882a593Smuzhiyun 				/*
1826*4882a593Smuzhiyun 				 * No need to lock if we are not at the
1827*4882a593Smuzhiyun 				 * end of the queue
1828*4882a593Smuzhiyun 				 */
1829*4882a593Smuzhiyun 				if (queue_length)
1830*4882a593Smuzhiyun 					list_del(&response->list);
1831*4882a593Smuzhiyun 				else {
1832*4882a593Smuzhiyun 					spin_lock_irq(
1833*4882a593Smuzhiyun 						&info->reassembly_queue_lock);
1834*4882a593Smuzhiyun 					list_del(&response->list);
1835*4882a593Smuzhiyun 					spin_unlock_irq(
1836*4882a593Smuzhiyun 						&info->reassembly_queue_lock);
1837*4882a593Smuzhiyun 				}
1838*4882a593Smuzhiyun 				queue_removed++;
1839*4882a593Smuzhiyun 				info->count_reassembly_queue--;
1840*4882a593Smuzhiyun 				info->count_dequeue_reassembly_queue++;
1841*4882a593Smuzhiyun 				put_receive_buffer(info, response);
1842*4882a593Smuzhiyun 				offset = 0;
1843*4882a593Smuzhiyun 				log_read(INFO, "put_receive_buffer offset=0\n");
1844*4882a593Smuzhiyun 			} else
1845*4882a593Smuzhiyun 				offset += to_copy;
1846*4882a593Smuzhiyun 
1847*4882a593Smuzhiyun 			to_read -= to_copy;
1848*4882a593Smuzhiyun 			data_read += to_copy;
1849*4882a593Smuzhiyun 
1850*4882a593Smuzhiyun 			log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n",
1851*4882a593Smuzhiyun 				 to_copy, data_length - offset,
1852*4882a593Smuzhiyun 				 to_read, data_read, offset);
1853*4882a593Smuzhiyun 		}
1854*4882a593Smuzhiyun 
1855*4882a593Smuzhiyun 		spin_lock_irq(&info->reassembly_queue_lock);
1856*4882a593Smuzhiyun 		info->reassembly_data_length -= data_read;
1857*4882a593Smuzhiyun 		info->reassembly_queue_length -= queue_removed;
1858*4882a593Smuzhiyun 		spin_unlock_irq(&info->reassembly_queue_lock);
1859*4882a593Smuzhiyun 
1860*4882a593Smuzhiyun 		info->first_entry_offset = offset;
1861*4882a593Smuzhiyun 		log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
1862*4882a593Smuzhiyun 			 data_read, info->reassembly_data_length,
1863*4882a593Smuzhiyun 			 info->first_entry_offset);
1864*4882a593Smuzhiyun read_rfc1002_done:
1865*4882a593Smuzhiyun 		return data_read;
1866*4882a593Smuzhiyun 	}
1867*4882a593Smuzhiyun 
1868*4882a593Smuzhiyun 	log_read(INFO, "wait_event on more data\n");
1869*4882a593Smuzhiyun 	rc = wait_event_interruptible(
1870*4882a593Smuzhiyun 		info->wait_reassembly_queue,
1871*4882a593Smuzhiyun 		info->reassembly_data_length >= size ||
1872*4882a593Smuzhiyun 			info->transport_status != SMBD_CONNECTED);
1873*4882a593Smuzhiyun 	/* Don't return any data if interrupted */
1874*4882a593Smuzhiyun 	if (rc)
1875*4882a593Smuzhiyun 		return rc;
1876*4882a593Smuzhiyun 
1877*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
1878*4882a593Smuzhiyun 		log_read(ERR, "disconnected\n");
1879*4882a593Smuzhiyun 		return -ECONNABORTED;
1880*4882a593Smuzhiyun 	}
1881*4882a593Smuzhiyun 
1882*4882a593Smuzhiyun 	goto again;
1883*4882a593Smuzhiyun }
1884*4882a593Smuzhiyun 
1885*4882a593Smuzhiyun /*
1886*4882a593Smuzhiyun  * Receive a page from receive reassembly queue
1887*4882a593Smuzhiyun  * page: the page to read data into
1888*4882a593Smuzhiyun  * to_read: the length of data to read
1889*4882a593Smuzhiyun  * return value: actual data read
1890*4882a593Smuzhiyun  */
smbd_recv_page(struct smbd_connection * info,struct page * page,unsigned int page_offset,unsigned int to_read)1891*4882a593Smuzhiyun static int smbd_recv_page(struct smbd_connection *info,
1892*4882a593Smuzhiyun 		struct page *page, unsigned int page_offset,
1893*4882a593Smuzhiyun 		unsigned int to_read)
1894*4882a593Smuzhiyun {
1895*4882a593Smuzhiyun 	int ret;
1896*4882a593Smuzhiyun 	char *to_address;
1897*4882a593Smuzhiyun 	void *page_address;
1898*4882a593Smuzhiyun 
1899*4882a593Smuzhiyun 	/* make sure we have the page ready for read */
1900*4882a593Smuzhiyun 	ret = wait_event_interruptible(
1901*4882a593Smuzhiyun 		info->wait_reassembly_queue,
1902*4882a593Smuzhiyun 		info->reassembly_data_length >= to_read ||
1903*4882a593Smuzhiyun 			info->transport_status != SMBD_CONNECTED);
1904*4882a593Smuzhiyun 	if (ret)
1905*4882a593Smuzhiyun 		return ret;
1906*4882a593Smuzhiyun 
1907*4882a593Smuzhiyun 	/* now we can read from reassembly queue and not sleep */
1908*4882a593Smuzhiyun 	page_address = kmap_atomic(page);
1909*4882a593Smuzhiyun 	to_address = (char *) page_address + page_offset;
1910*4882a593Smuzhiyun 
1911*4882a593Smuzhiyun 	log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
1912*4882a593Smuzhiyun 		page, to_address, to_read);
1913*4882a593Smuzhiyun 
1914*4882a593Smuzhiyun 	ret = smbd_recv_buf(info, to_address, to_read);
1915*4882a593Smuzhiyun 	kunmap_atomic(page_address);
1916*4882a593Smuzhiyun 
1917*4882a593Smuzhiyun 	return ret;
1918*4882a593Smuzhiyun }
1919*4882a593Smuzhiyun 
1920*4882a593Smuzhiyun /*
1921*4882a593Smuzhiyun  * Receive data from transport
1922*4882a593Smuzhiyun  * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
1923*4882a593Smuzhiyun  * return: total bytes read, or 0. SMB Direct will not do partial read.
1924*4882a593Smuzhiyun  */
smbd_recv(struct smbd_connection * info,struct msghdr * msg)1925*4882a593Smuzhiyun int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
1926*4882a593Smuzhiyun {
1927*4882a593Smuzhiyun 	char *buf;
1928*4882a593Smuzhiyun 	struct page *page;
1929*4882a593Smuzhiyun 	unsigned int to_read, page_offset;
1930*4882a593Smuzhiyun 	int rc;
1931*4882a593Smuzhiyun 
1932*4882a593Smuzhiyun 	if (iov_iter_rw(&msg->msg_iter) == WRITE) {
1933*4882a593Smuzhiyun 		/* It's a bug in upper layer to get there */
1934*4882a593Smuzhiyun 		cifs_dbg(VFS, "Invalid msg iter dir %u\n",
1935*4882a593Smuzhiyun 			 iov_iter_rw(&msg->msg_iter));
1936*4882a593Smuzhiyun 		rc = -EINVAL;
1937*4882a593Smuzhiyun 		goto out;
1938*4882a593Smuzhiyun 	}
1939*4882a593Smuzhiyun 
1940*4882a593Smuzhiyun 	switch (iov_iter_type(&msg->msg_iter)) {
1941*4882a593Smuzhiyun 	case ITER_KVEC:
1942*4882a593Smuzhiyun 		buf = msg->msg_iter.kvec->iov_base;
1943*4882a593Smuzhiyun 		to_read = msg->msg_iter.kvec->iov_len;
1944*4882a593Smuzhiyun 		rc = smbd_recv_buf(info, buf, to_read);
1945*4882a593Smuzhiyun 		break;
1946*4882a593Smuzhiyun 
1947*4882a593Smuzhiyun 	case ITER_BVEC:
1948*4882a593Smuzhiyun 		page = msg->msg_iter.bvec->bv_page;
1949*4882a593Smuzhiyun 		page_offset = msg->msg_iter.bvec->bv_offset;
1950*4882a593Smuzhiyun 		to_read = msg->msg_iter.bvec->bv_len;
1951*4882a593Smuzhiyun 		rc = smbd_recv_page(info, page, page_offset, to_read);
1952*4882a593Smuzhiyun 		break;
1953*4882a593Smuzhiyun 
1954*4882a593Smuzhiyun 	default:
1955*4882a593Smuzhiyun 		/* It's a bug in upper layer to get there */
1956*4882a593Smuzhiyun 		cifs_dbg(VFS, "Invalid msg type %d\n",
1957*4882a593Smuzhiyun 			 iov_iter_type(&msg->msg_iter));
1958*4882a593Smuzhiyun 		rc = -EINVAL;
1959*4882a593Smuzhiyun 	}
1960*4882a593Smuzhiyun 
1961*4882a593Smuzhiyun out:
1962*4882a593Smuzhiyun 	/* SMBDirect will read it all or nothing */
1963*4882a593Smuzhiyun 	if (rc > 0)
1964*4882a593Smuzhiyun 		msg->msg_iter.count = 0;
1965*4882a593Smuzhiyun 	return rc;
1966*4882a593Smuzhiyun }
1967*4882a593Smuzhiyun 
1968*4882a593Smuzhiyun /*
1969*4882a593Smuzhiyun  * Send data to transport
1970*4882a593Smuzhiyun  * Each rqst is transported as a SMBDirect payload
1971*4882a593Smuzhiyun  * rqst: the data to write
1972*4882a593Smuzhiyun  * return value: 0 if successfully write, otherwise error code
1973*4882a593Smuzhiyun  */
smbd_send(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst_array)1974*4882a593Smuzhiyun int smbd_send(struct TCP_Server_Info *server,
1975*4882a593Smuzhiyun 	int num_rqst, struct smb_rqst *rqst_array)
1976*4882a593Smuzhiyun {
1977*4882a593Smuzhiyun 	struct smbd_connection *info = server->smbd_conn;
1978*4882a593Smuzhiyun 	struct kvec vec;
1979*4882a593Smuzhiyun 	int nvecs;
1980*4882a593Smuzhiyun 	int size;
1981*4882a593Smuzhiyun 	unsigned int buflen, remaining_data_length;
1982*4882a593Smuzhiyun 	int start, i, j;
1983*4882a593Smuzhiyun 	int max_iov_size =
1984*4882a593Smuzhiyun 		info->max_send_size - sizeof(struct smbd_data_transfer);
1985*4882a593Smuzhiyun 	struct kvec *iov;
1986*4882a593Smuzhiyun 	int rc;
1987*4882a593Smuzhiyun 	struct smb_rqst *rqst;
1988*4882a593Smuzhiyun 	int rqst_idx;
1989*4882a593Smuzhiyun 
1990*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
1991*4882a593Smuzhiyun 		rc = -EAGAIN;
1992*4882a593Smuzhiyun 		goto done;
1993*4882a593Smuzhiyun 	}
1994*4882a593Smuzhiyun 
1995*4882a593Smuzhiyun 	/*
1996*4882a593Smuzhiyun 	 * Add in the page array if there is one. The caller needs to set
1997*4882a593Smuzhiyun 	 * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
1998*4882a593Smuzhiyun 	 * ends at page boundary
1999*4882a593Smuzhiyun 	 */
2000*4882a593Smuzhiyun 	remaining_data_length = 0;
2001*4882a593Smuzhiyun 	for (i = 0; i < num_rqst; i++)
2002*4882a593Smuzhiyun 		remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
2003*4882a593Smuzhiyun 
2004*4882a593Smuzhiyun 	if (remaining_data_length > info->max_fragmented_send_size) {
2005*4882a593Smuzhiyun 		log_write(ERR, "payload size %d > max size %d\n",
2006*4882a593Smuzhiyun 			remaining_data_length, info->max_fragmented_send_size);
2007*4882a593Smuzhiyun 		rc = -EINVAL;
2008*4882a593Smuzhiyun 		goto done;
2009*4882a593Smuzhiyun 	}
2010*4882a593Smuzhiyun 
2011*4882a593Smuzhiyun 	log_write(INFO, "num_rqst=%d total length=%u\n",
2012*4882a593Smuzhiyun 			num_rqst, remaining_data_length);
2013*4882a593Smuzhiyun 
2014*4882a593Smuzhiyun 	rqst_idx = 0;
2015*4882a593Smuzhiyun next_rqst:
2016*4882a593Smuzhiyun 	rqst = &rqst_array[rqst_idx];
2017*4882a593Smuzhiyun 	iov = rqst->rq_iov;
2018*4882a593Smuzhiyun 
2019*4882a593Smuzhiyun 	cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
2020*4882a593Smuzhiyun 		rqst_idx, smb_rqst_len(server, rqst));
2021*4882a593Smuzhiyun 	for (i = 0; i < rqst->rq_nvec; i++)
2022*4882a593Smuzhiyun 		dump_smb(iov[i].iov_base, iov[i].iov_len);
2023*4882a593Smuzhiyun 
2024*4882a593Smuzhiyun 
2025*4882a593Smuzhiyun 	log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d rq_tailsz=%d buflen=%lu\n",
2026*4882a593Smuzhiyun 		  rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
2027*4882a593Smuzhiyun 		  rqst->rq_tailsz, smb_rqst_len(server, rqst));
2028*4882a593Smuzhiyun 
2029*4882a593Smuzhiyun 	start = i = 0;
2030*4882a593Smuzhiyun 	buflen = 0;
2031*4882a593Smuzhiyun 	while (true) {
2032*4882a593Smuzhiyun 		buflen += iov[i].iov_len;
2033*4882a593Smuzhiyun 		if (buflen > max_iov_size) {
2034*4882a593Smuzhiyun 			if (i > start) {
2035*4882a593Smuzhiyun 				remaining_data_length -=
2036*4882a593Smuzhiyun 					(buflen-iov[i].iov_len);
2037*4882a593Smuzhiyun 				log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
2038*4882a593Smuzhiyun 					  start, i, i - start,
2039*4882a593Smuzhiyun 					  remaining_data_length);
2040*4882a593Smuzhiyun 				rc = smbd_post_send_data(
2041*4882a593Smuzhiyun 					info, &iov[start], i-start,
2042*4882a593Smuzhiyun 					remaining_data_length);
2043*4882a593Smuzhiyun 				if (rc)
2044*4882a593Smuzhiyun 					goto done;
2045*4882a593Smuzhiyun 			} else {
2046*4882a593Smuzhiyun 				/* iov[start] is too big, break it */
2047*4882a593Smuzhiyun 				nvecs = (buflen+max_iov_size-1)/max_iov_size;
2048*4882a593Smuzhiyun 				log_write(INFO, "iov[%d] iov_base=%p buflen=%d break to %d vectors\n",
2049*4882a593Smuzhiyun 					  start, iov[start].iov_base,
2050*4882a593Smuzhiyun 					  buflen, nvecs);
2051*4882a593Smuzhiyun 				for (j = 0; j < nvecs; j++) {
2052*4882a593Smuzhiyun 					vec.iov_base =
2053*4882a593Smuzhiyun 						(char *)iov[start].iov_base +
2054*4882a593Smuzhiyun 						j*max_iov_size;
2055*4882a593Smuzhiyun 					vec.iov_len = max_iov_size;
2056*4882a593Smuzhiyun 					if (j == nvecs-1)
2057*4882a593Smuzhiyun 						vec.iov_len =
2058*4882a593Smuzhiyun 							buflen -
2059*4882a593Smuzhiyun 							max_iov_size*(nvecs-1);
2060*4882a593Smuzhiyun 					remaining_data_length -= vec.iov_len;
2061*4882a593Smuzhiyun 					log_write(INFO,
2062*4882a593Smuzhiyun 						"sending vec j=%d iov_base=%p iov_len=%zu remaining_data_length=%d\n",
2063*4882a593Smuzhiyun 						  j, vec.iov_base, vec.iov_len,
2064*4882a593Smuzhiyun 						  remaining_data_length);
2065*4882a593Smuzhiyun 					rc = smbd_post_send_data(
2066*4882a593Smuzhiyun 						info, &vec, 1,
2067*4882a593Smuzhiyun 						remaining_data_length);
2068*4882a593Smuzhiyun 					if (rc)
2069*4882a593Smuzhiyun 						goto done;
2070*4882a593Smuzhiyun 				}
2071*4882a593Smuzhiyun 				i++;
2072*4882a593Smuzhiyun 				if (i == rqst->rq_nvec)
2073*4882a593Smuzhiyun 					break;
2074*4882a593Smuzhiyun 			}
2075*4882a593Smuzhiyun 			start = i;
2076*4882a593Smuzhiyun 			buflen = 0;
2077*4882a593Smuzhiyun 		} else {
2078*4882a593Smuzhiyun 			i++;
2079*4882a593Smuzhiyun 			if (i == rqst->rq_nvec) {
2080*4882a593Smuzhiyun 				/* send out all remaining vecs */
2081*4882a593Smuzhiyun 				remaining_data_length -= buflen;
2082*4882a593Smuzhiyun 				log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
2083*4882a593Smuzhiyun 					  start, i, i - start,
2084*4882a593Smuzhiyun 					  remaining_data_length);
2085*4882a593Smuzhiyun 				rc = smbd_post_send_data(info, &iov[start],
2086*4882a593Smuzhiyun 					i-start, remaining_data_length);
2087*4882a593Smuzhiyun 				if (rc)
2088*4882a593Smuzhiyun 					goto done;
2089*4882a593Smuzhiyun 				break;
2090*4882a593Smuzhiyun 			}
2091*4882a593Smuzhiyun 		}
2092*4882a593Smuzhiyun 		log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
2093*4882a593Smuzhiyun 	}
2094*4882a593Smuzhiyun 
2095*4882a593Smuzhiyun 	/* now sending pages if there are any */
2096*4882a593Smuzhiyun 	for (i = 0; i < rqst->rq_npages; i++) {
2097*4882a593Smuzhiyun 		unsigned int offset;
2098*4882a593Smuzhiyun 
2099*4882a593Smuzhiyun 		rqst_page_get_length(rqst, i, &buflen, &offset);
2100*4882a593Smuzhiyun 		nvecs = (buflen + max_iov_size - 1) / max_iov_size;
2101*4882a593Smuzhiyun 		log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
2102*4882a593Smuzhiyun 			buflen, nvecs);
2103*4882a593Smuzhiyun 		for (j = 0; j < nvecs; j++) {
2104*4882a593Smuzhiyun 			size = max_iov_size;
2105*4882a593Smuzhiyun 			if (j == nvecs-1)
2106*4882a593Smuzhiyun 				size = buflen - j*max_iov_size;
2107*4882a593Smuzhiyun 			remaining_data_length -= size;
2108*4882a593Smuzhiyun 			log_write(INFO, "sending pages i=%d offset=%d size=%d remaining_data_length=%d\n",
2109*4882a593Smuzhiyun 				  i, j * max_iov_size + offset, size,
2110*4882a593Smuzhiyun 				  remaining_data_length);
2111*4882a593Smuzhiyun 			rc = smbd_post_send_page(
2112*4882a593Smuzhiyun 				info, rqst->rq_pages[i],
2113*4882a593Smuzhiyun 				j*max_iov_size + offset,
2114*4882a593Smuzhiyun 				size, remaining_data_length);
2115*4882a593Smuzhiyun 			if (rc)
2116*4882a593Smuzhiyun 				goto done;
2117*4882a593Smuzhiyun 		}
2118*4882a593Smuzhiyun 	}
2119*4882a593Smuzhiyun 
2120*4882a593Smuzhiyun 	rqst_idx++;
2121*4882a593Smuzhiyun 	if (rqst_idx < num_rqst)
2122*4882a593Smuzhiyun 		goto next_rqst;
2123*4882a593Smuzhiyun 
2124*4882a593Smuzhiyun done:
2125*4882a593Smuzhiyun 	/*
2126*4882a593Smuzhiyun 	 * As an optimization, we don't wait for individual I/O to finish
2127*4882a593Smuzhiyun 	 * before sending the next one.
2128*4882a593Smuzhiyun 	 * Send them all and wait for pending send count to get to 0
2129*4882a593Smuzhiyun 	 * that means all the I/Os have been out and we are good to return
2130*4882a593Smuzhiyun 	 */
2131*4882a593Smuzhiyun 
2132*4882a593Smuzhiyun 	wait_event(info->wait_send_pending,
2133*4882a593Smuzhiyun 		atomic_read(&info->send_pending) == 0);
2134*4882a593Smuzhiyun 
2135*4882a593Smuzhiyun 	return rc;
2136*4882a593Smuzhiyun }
2137*4882a593Smuzhiyun 
register_mr_done(struct ib_cq * cq,struct ib_wc * wc)2138*4882a593Smuzhiyun static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc)
2139*4882a593Smuzhiyun {
2140*4882a593Smuzhiyun 	struct smbd_mr *mr;
2141*4882a593Smuzhiyun 	struct ib_cqe *cqe;
2142*4882a593Smuzhiyun 
2143*4882a593Smuzhiyun 	if (wc->status) {
2144*4882a593Smuzhiyun 		log_rdma_mr(ERR, "status=%d\n", wc->status);
2145*4882a593Smuzhiyun 		cqe = wc->wr_cqe;
2146*4882a593Smuzhiyun 		mr = container_of(cqe, struct smbd_mr, cqe);
2147*4882a593Smuzhiyun 		smbd_disconnect_rdma_connection(mr->conn);
2148*4882a593Smuzhiyun 	}
2149*4882a593Smuzhiyun }
2150*4882a593Smuzhiyun 
2151*4882a593Smuzhiyun /*
2152*4882a593Smuzhiyun  * The work queue function that recovers MRs
2153*4882a593Smuzhiyun  * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used
2154*4882a593Smuzhiyun  * again. Both calls are slow, so finish them in a workqueue. This will not
2155*4882a593Smuzhiyun  * block I/O path.
2156*4882a593Smuzhiyun  * There is one workqueue that recovers MRs, there is no need to lock as the
2157*4882a593Smuzhiyun  * I/O requests calling smbd_register_mr will never update the links in the
2158*4882a593Smuzhiyun  * mr_list.
2159*4882a593Smuzhiyun  */
smbd_mr_recovery_work(struct work_struct * work)2160*4882a593Smuzhiyun static void smbd_mr_recovery_work(struct work_struct *work)
2161*4882a593Smuzhiyun {
2162*4882a593Smuzhiyun 	struct smbd_connection *info =
2163*4882a593Smuzhiyun 		container_of(work, struct smbd_connection, mr_recovery_work);
2164*4882a593Smuzhiyun 	struct smbd_mr *smbdirect_mr;
2165*4882a593Smuzhiyun 	int rc;
2166*4882a593Smuzhiyun 
2167*4882a593Smuzhiyun 	list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
2168*4882a593Smuzhiyun 		if (smbdirect_mr->state == MR_ERROR) {
2169*4882a593Smuzhiyun 
2170*4882a593Smuzhiyun 			/* recover this MR entry */
2171*4882a593Smuzhiyun 			rc = ib_dereg_mr(smbdirect_mr->mr);
2172*4882a593Smuzhiyun 			if (rc) {
2173*4882a593Smuzhiyun 				log_rdma_mr(ERR,
2174*4882a593Smuzhiyun 					"ib_dereg_mr failed rc=%x\n",
2175*4882a593Smuzhiyun 					rc);
2176*4882a593Smuzhiyun 				smbd_disconnect_rdma_connection(info);
2177*4882a593Smuzhiyun 				continue;
2178*4882a593Smuzhiyun 			}
2179*4882a593Smuzhiyun 
2180*4882a593Smuzhiyun 			smbdirect_mr->mr = ib_alloc_mr(
2181*4882a593Smuzhiyun 				info->pd, info->mr_type,
2182*4882a593Smuzhiyun 				info->max_frmr_depth);
2183*4882a593Smuzhiyun 			if (IS_ERR(smbdirect_mr->mr)) {
2184*4882a593Smuzhiyun 				log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
2185*4882a593Smuzhiyun 					    info->mr_type,
2186*4882a593Smuzhiyun 					    info->max_frmr_depth);
2187*4882a593Smuzhiyun 				smbd_disconnect_rdma_connection(info);
2188*4882a593Smuzhiyun 				continue;
2189*4882a593Smuzhiyun 			}
2190*4882a593Smuzhiyun 		} else
2191*4882a593Smuzhiyun 			/* This MR is being used, don't recover it */
2192*4882a593Smuzhiyun 			continue;
2193*4882a593Smuzhiyun 
2194*4882a593Smuzhiyun 		smbdirect_mr->state = MR_READY;
2195*4882a593Smuzhiyun 
2196*4882a593Smuzhiyun 		/* smbdirect_mr->state is updated by this function
2197*4882a593Smuzhiyun 		 * and is read and updated by I/O issuing CPUs trying
2198*4882a593Smuzhiyun 		 * to get a MR, the call to atomic_inc_return
2199*4882a593Smuzhiyun 		 * implicates a memory barrier and guarantees this
2200*4882a593Smuzhiyun 		 * value is updated before waking up any calls to
2201*4882a593Smuzhiyun 		 * get_mr() from the I/O issuing CPUs
2202*4882a593Smuzhiyun 		 */
2203*4882a593Smuzhiyun 		if (atomic_inc_return(&info->mr_ready_count) == 1)
2204*4882a593Smuzhiyun 			wake_up_interruptible(&info->wait_mr);
2205*4882a593Smuzhiyun 	}
2206*4882a593Smuzhiyun }
2207*4882a593Smuzhiyun 
destroy_mr_list(struct smbd_connection * info)2208*4882a593Smuzhiyun static void destroy_mr_list(struct smbd_connection *info)
2209*4882a593Smuzhiyun {
2210*4882a593Smuzhiyun 	struct smbd_mr *mr, *tmp;
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun 	cancel_work_sync(&info->mr_recovery_work);
2213*4882a593Smuzhiyun 	list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
2214*4882a593Smuzhiyun 		if (mr->state == MR_INVALIDATED)
2215*4882a593Smuzhiyun 			ib_dma_unmap_sg(info->id->device, mr->sgl,
2216*4882a593Smuzhiyun 				mr->sgl_count, mr->dir);
2217*4882a593Smuzhiyun 		ib_dereg_mr(mr->mr);
2218*4882a593Smuzhiyun 		kfree(mr->sgl);
2219*4882a593Smuzhiyun 		kfree(mr);
2220*4882a593Smuzhiyun 	}
2221*4882a593Smuzhiyun }
2222*4882a593Smuzhiyun 
2223*4882a593Smuzhiyun /*
2224*4882a593Smuzhiyun  * Allocate MRs used for RDMA read/write
2225*4882a593Smuzhiyun  * The number of MRs will not exceed hardware capability in responder_resources
2226*4882a593Smuzhiyun  * All MRs are kept in mr_list. The MR can be recovered after it's used
2227*4882a593Smuzhiyun  * Recovery is done in smbd_mr_recovery_work. The content of list entry changes
2228*4882a593Smuzhiyun  * as MRs are used and recovered for I/O, but the list links will not change
2229*4882a593Smuzhiyun  */
allocate_mr_list(struct smbd_connection * info)2230*4882a593Smuzhiyun static int allocate_mr_list(struct smbd_connection *info)
2231*4882a593Smuzhiyun {
2232*4882a593Smuzhiyun 	int i;
2233*4882a593Smuzhiyun 	struct smbd_mr *smbdirect_mr, *tmp;
2234*4882a593Smuzhiyun 
2235*4882a593Smuzhiyun 	INIT_LIST_HEAD(&info->mr_list);
2236*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_mr);
2237*4882a593Smuzhiyun 	spin_lock_init(&info->mr_list_lock);
2238*4882a593Smuzhiyun 	atomic_set(&info->mr_ready_count, 0);
2239*4882a593Smuzhiyun 	atomic_set(&info->mr_used_count, 0);
2240*4882a593Smuzhiyun 	init_waitqueue_head(&info->wait_for_mr_cleanup);
2241*4882a593Smuzhiyun 	/* Allocate more MRs (2x) than hardware responder_resources */
2242*4882a593Smuzhiyun 	for (i = 0; i < info->responder_resources * 2; i++) {
2243*4882a593Smuzhiyun 		smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
2244*4882a593Smuzhiyun 		if (!smbdirect_mr)
2245*4882a593Smuzhiyun 			goto out;
2246*4882a593Smuzhiyun 		smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
2247*4882a593Smuzhiyun 					info->max_frmr_depth);
2248*4882a593Smuzhiyun 		if (IS_ERR(smbdirect_mr->mr)) {
2249*4882a593Smuzhiyun 			log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
2250*4882a593Smuzhiyun 				    info->mr_type, info->max_frmr_depth);
2251*4882a593Smuzhiyun 			goto out;
2252*4882a593Smuzhiyun 		}
2253*4882a593Smuzhiyun 		smbdirect_mr->sgl = kcalloc(
2254*4882a593Smuzhiyun 					info->max_frmr_depth,
2255*4882a593Smuzhiyun 					sizeof(struct scatterlist),
2256*4882a593Smuzhiyun 					GFP_KERNEL);
2257*4882a593Smuzhiyun 		if (!smbdirect_mr->sgl) {
2258*4882a593Smuzhiyun 			log_rdma_mr(ERR, "failed to allocate sgl\n");
2259*4882a593Smuzhiyun 			ib_dereg_mr(smbdirect_mr->mr);
2260*4882a593Smuzhiyun 			goto out;
2261*4882a593Smuzhiyun 		}
2262*4882a593Smuzhiyun 		smbdirect_mr->state = MR_READY;
2263*4882a593Smuzhiyun 		smbdirect_mr->conn = info;
2264*4882a593Smuzhiyun 
2265*4882a593Smuzhiyun 		list_add_tail(&smbdirect_mr->list, &info->mr_list);
2266*4882a593Smuzhiyun 		atomic_inc(&info->mr_ready_count);
2267*4882a593Smuzhiyun 	}
2268*4882a593Smuzhiyun 	INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
2269*4882a593Smuzhiyun 	return 0;
2270*4882a593Smuzhiyun 
2271*4882a593Smuzhiyun out:
2272*4882a593Smuzhiyun 	kfree(smbdirect_mr);
2273*4882a593Smuzhiyun 
2274*4882a593Smuzhiyun 	list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
2275*4882a593Smuzhiyun 		ib_dereg_mr(smbdirect_mr->mr);
2276*4882a593Smuzhiyun 		kfree(smbdirect_mr->sgl);
2277*4882a593Smuzhiyun 		kfree(smbdirect_mr);
2278*4882a593Smuzhiyun 	}
2279*4882a593Smuzhiyun 	return -ENOMEM;
2280*4882a593Smuzhiyun }
2281*4882a593Smuzhiyun 
2282*4882a593Smuzhiyun /*
2283*4882a593Smuzhiyun  * Get a MR from mr_list. This function waits until there is at least one
2284*4882a593Smuzhiyun  * MR available in the list. It may access the list while the
2285*4882a593Smuzhiyun  * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
2286*4882a593Smuzhiyun  * as they never modify the same places. However, there may be several CPUs
2287*4882a593Smuzhiyun  * issueing I/O trying to get MR at the same time, mr_list_lock is used to
2288*4882a593Smuzhiyun  * protect this situation.
2289*4882a593Smuzhiyun  */
get_mr(struct smbd_connection * info)2290*4882a593Smuzhiyun static struct smbd_mr *get_mr(struct smbd_connection *info)
2291*4882a593Smuzhiyun {
2292*4882a593Smuzhiyun 	struct smbd_mr *ret;
2293*4882a593Smuzhiyun 	int rc;
2294*4882a593Smuzhiyun again:
2295*4882a593Smuzhiyun 	rc = wait_event_interruptible(info->wait_mr,
2296*4882a593Smuzhiyun 		atomic_read(&info->mr_ready_count) ||
2297*4882a593Smuzhiyun 		info->transport_status != SMBD_CONNECTED);
2298*4882a593Smuzhiyun 	if (rc) {
2299*4882a593Smuzhiyun 		log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc);
2300*4882a593Smuzhiyun 		return NULL;
2301*4882a593Smuzhiyun 	}
2302*4882a593Smuzhiyun 
2303*4882a593Smuzhiyun 	if (info->transport_status != SMBD_CONNECTED) {
2304*4882a593Smuzhiyun 		log_rdma_mr(ERR, "info->transport_status=%x\n",
2305*4882a593Smuzhiyun 			info->transport_status);
2306*4882a593Smuzhiyun 		return NULL;
2307*4882a593Smuzhiyun 	}
2308*4882a593Smuzhiyun 
2309*4882a593Smuzhiyun 	spin_lock(&info->mr_list_lock);
2310*4882a593Smuzhiyun 	list_for_each_entry(ret, &info->mr_list, list) {
2311*4882a593Smuzhiyun 		if (ret->state == MR_READY) {
2312*4882a593Smuzhiyun 			ret->state = MR_REGISTERED;
2313*4882a593Smuzhiyun 			spin_unlock(&info->mr_list_lock);
2314*4882a593Smuzhiyun 			atomic_dec(&info->mr_ready_count);
2315*4882a593Smuzhiyun 			atomic_inc(&info->mr_used_count);
2316*4882a593Smuzhiyun 			return ret;
2317*4882a593Smuzhiyun 		}
2318*4882a593Smuzhiyun 	}
2319*4882a593Smuzhiyun 
2320*4882a593Smuzhiyun 	spin_unlock(&info->mr_list_lock);
2321*4882a593Smuzhiyun 	/*
2322*4882a593Smuzhiyun 	 * It is possible that we could fail to get MR because other processes may
2323*4882a593Smuzhiyun 	 * try to acquire a MR at the same time. If this is the case, retry it.
2324*4882a593Smuzhiyun 	 */
2325*4882a593Smuzhiyun 	goto again;
2326*4882a593Smuzhiyun }
2327*4882a593Smuzhiyun 
2328*4882a593Smuzhiyun /*
2329*4882a593Smuzhiyun  * Register memory for RDMA read/write
2330*4882a593Smuzhiyun  * pages[]: the list of pages to register memory with
2331*4882a593Smuzhiyun  * num_pages: the number of pages to register
2332*4882a593Smuzhiyun  * tailsz: if non-zero, the bytes to register in the last page
2333*4882a593Smuzhiyun  * writing: true if this is a RDMA write (SMB read), false for RDMA read
2334*4882a593Smuzhiyun  * need_invalidate: true if this MR needs to be locally invalidated after I/O
2335*4882a593Smuzhiyun  * return value: the MR registered, NULL if failed.
2336*4882a593Smuzhiyun  */
smbd_register_mr(struct smbd_connection * info,struct page * pages[],int num_pages,int offset,int tailsz,bool writing,bool need_invalidate)2337*4882a593Smuzhiyun struct smbd_mr *smbd_register_mr(
2338*4882a593Smuzhiyun 	struct smbd_connection *info, struct page *pages[], int num_pages,
2339*4882a593Smuzhiyun 	int offset, int tailsz, bool writing, bool need_invalidate)
2340*4882a593Smuzhiyun {
2341*4882a593Smuzhiyun 	struct smbd_mr *smbdirect_mr;
2342*4882a593Smuzhiyun 	int rc, i;
2343*4882a593Smuzhiyun 	enum dma_data_direction dir;
2344*4882a593Smuzhiyun 	struct ib_reg_wr *reg_wr;
2345*4882a593Smuzhiyun 
2346*4882a593Smuzhiyun 	if (num_pages > info->max_frmr_depth) {
2347*4882a593Smuzhiyun 		log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
2348*4882a593Smuzhiyun 			num_pages, info->max_frmr_depth);
2349*4882a593Smuzhiyun 		return NULL;
2350*4882a593Smuzhiyun 	}
2351*4882a593Smuzhiyun 
2352*4882a593Smuzhiyun 	smbdirect_mr = get_mr(info);
2353*4882a593Smuzhiyun 	if (!smbdirect_mr) {
2354*4882a593Smuzhiyun 		log_rdma_mr(ERR, "get_mr returning NULL\n");
2355*4882a593Smuzhiyun 		return NULL;
2356*4882a593Smuzhiyun 	}
2357*4882a593Smuzhiyun 	smbdirect_mr->need_invalidate = need_invalidate;
2358*4882a593Smuzhiyun 	smbdirect_mr->sgl_count = num_pages;
2359*4882a593Smuzhiyun 	sg_init_table(smbdirect_mr->sgl, num_pages);
2360*4882a593Smuzhiyun 
2361*4882a593Smuzhiyun 	log_rdma_mr(INFO, "num_pages=0x%x offset=0x%x tailsz=0x%x\n",
2362*4882a593Smuzhiyun 			num_pages, offset, tailsz);
2363*4882a593Smuzhiyun 
2364*4882a593Smuzhiyun 	if (num_pages == 1) {
2365*4882a593Smuzhiyun 		sg_set_page(&smbdirect_mr->sgl[0], pages[0], tailsz, offset);
2366*4882a593Smuzhiyun 		goto skip_multiple_pages;
2367*4882a593Smuzhiyun 	}
2368*4882a593Smuzhiyun 
2369*4882a593Smuzhiyun 	/* We have at least two pages to register */
2370*4882a593Smuzhiyun 	sg_set_page(
2371*4882a593Smuzhiyun 		&smbdirect_mr->sgl[0], pages[0], PAGE_SIZE - offset, offset);
2372*4882a593Smuzhiyun 	i = 1;
2373*4882a593Smuzhiyun 	while (i < num_pages - 1) {
2374*4882a593Smuzhiyun 		sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0);
2375*4882a593Smuzhiyun 		i++;
2376*4882a593Smuzhiyun 	}
2377*4882a593Smuzhiyun 	sg_set_page(&smbdirect_mr->sgl[i], pages[i],
2378*4882a593Smuzhiyun 		tailsz ? tailsz : PAGE_SIZE, 0);
2379*4882a593Smuzhiyun 
2380*4882a593Smuzhiyun skip_multiple_pages:
2381*4882a593Smuzhiyun 	dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2382*4882a593Smuzhiyun 	smbdirect_mr->dir = dir;
2383*4882a593Smuzhiyun 	rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir);
2384*4882a593Smuzhiyun 	if (!rc) {
2385*4882a593Smuzhiyun 		log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
2386*4882a593Smuzhiyun 			num_pages, dir, rc);
2387*4882a593Smuzhiyun 		goto dma_map_error;
2388*4882a593Smuzhiyun 	}
2389*4882a593Smuzhiyun 
2390*4882a593Smuzhiyun 	rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages,
2391*4882a593Smuzhiyun 		NULL, PAGE_SIZE);
2392*4882a593Smuzhiyun 	if (rc != num_pages) {
2393*4882a593Smuzhiyun 		log_rdma_mr(ERR,
2394*4882a593Smuzhiyun 			"ib_map_mr_sg failed rc = %d num_pages = %x\n",
2395*4882a593Smuzhiyun 			rc, num_pages);
2396*4882a593Smuzhiyun 		goto map_mr_error;
2397*4882a593Smuzhiyun 	}
2398*4882a593Smuzhiyun 
2399*4882a593Smuzhiyun 	ib_update_fast_reg_key(smbdirect_mr->mr,
2400*4882a593Smuzhiyun 		ib_inc_rkey(smbdirect_mr->mr->rkey));
2401*4882a593Smuzhiyun 	reg_wr = &smbdirect_mr->wr;
2402*4882a593Smuzhiyun 	reg_wr->wr.opcode = IB_WR_REG_MR;
2403*4882a593Smuzhiyun 	smbdirect_mr->cqe.done = register_mr_done;
2404*4882a593Smuzhiyun 	reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
2405*4882a593Smuzhiyun 	reg_wr->wr.num_sge = 0;
2406*4882a593Smuzhiyun 	reg_wr->wr.send_flags = IB_SEND_SIGNALED;
2407*4882a593Smuzhiyun 	reg_wr->mr = smbdirect_mr->mr;
2408*4882a593Smuzhiyun 	reg_wr->key = smbdirect_mr->mr->rkey;
2409*4882a593Smuzhiyun 	reg_wr->access = writing ?
2410*4882a593Smuzhiyun 			IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2411*4882a593Smuzhiyun 			IB_ACCESS_REMOTE_READ;
2412*4882a593Smuzhiyun 
2413*4882a593Smuzhiyun 	/*
2414*4882a593Smuzhiyun 	 * There is no need for waiting for complemtion on ib_post_send
2415*4882a593Smuzhiyun 	 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
2416*4882a593Smuzhiyun 	 * on the next ib_post_send when we actaully send I/O to remote peer
2417*4882a593Smuzhiyun 	 */
2418*4882a593Smuzhiyun 	rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
2419*4882a593Smuzhiyun 	if (!rc)
2420*4882a593Smuzhiyun 		return smbdirect_mr;
2421*4882a593Smuzhiyun 
2422*4882a593Smuzhiyun 	log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
2423*4882a593Smuzhiyun 		rc, reg_wr->key);
2424*4882a593Smuzhiyun 
2425*4882a593Smuzhiyun 	/* If all failed, attempt to recover this MR by setting it MR_ERROR*/
2426*4882a593Smuzhiyun map_mr_error:
2427*4882a593Smuzhiyun 	ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl,
2428*4882a593Smuzhiyun 		smbdirect_mr->sgl_count, smbdirect_mr->dir);
2429*4882a593Smuzhiyun 
2430*4882a593Smuzhiyun dma_map_error:
2431*4882a593Smuzhiyun 	smbdirect_mr->state = MR_ERROR;
2432*4882a593Smuzhiyun 	if (atomic_dec_and_test(&info->mr_used_count))
2433*4882a593Smuzhiyun 		wake_up(&info->wait_for_mr_cleanup);
2434*4882a593Smuzhiyun 
2435*4882a593Smuzhiyun 	smbd_disconnect_rdma_connection(info);
2436*4882a593Smuzhiyun 
2437*4882a593Smuzhiyun 	return NULL;
2438*4882a593Smuzhiyun }
2439*4882a593Smuzhiyun 
local_inv_done(struct ib_cq * cq,struct ib_wc * wc)2440*4882a593Smuzhiyun static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
2441*4882a593Smuzhiyun {
2442*4882a593Smuzhiyun 	struct smbd_mr *smbdirect_mr;
2443*4882a593Smuzhiyun 	struct ib_cqe *cqe;
2444*4882a593Smuzhiyun 
2445*4882a593Smuzhiyun 	cqe = wc->wr_cqe;
2446*4882a593Smuzhiyun 	smbdirect_mr = container_of(cqe, struct smbd_mr, cqe);
2447*4882a593Smuzhiyun 	smbdirect_mr->state = MR_INVALIDATED;
2448*4882a593Smuzhiyun 	if (wc->status != IB_WC_SUCCESS) {
2449*4882a593Smuzhiyun 		log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status);
2450*4882a593Smuzhiyun 		smbdirect_mr->state = MR_ERROR;
2451*4882a593Smuzhiyun 	}
2452*4882a593Smuzhiyun 	complete(&smbdirect_mr->invalidate_done);
2453*4882a593Smuzhiyun }
2454*4882a593Smuzhiyun 
2455*4882a593Smuzhiyun /*
2456*4882a593Smuzhiyun  * Deregister a MR after I/O is done
2457*4882a593Smuzhiyun  * This function may wait if remote invalidation is not used
2458*4882a593Smuzhiyun  * and we have to locally invalidate the buffer to prevent data is being
2459*4882a593Smuzhiyun  * modified by remote peer after upper layer consumes it
2460*4882a593Smuzhiyun  */
smbd_deregister_mr(struct smbd_mr * smbdirect_mr)2461*4882a593Smuzhiyun int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
2462*4882a593Smuzhiyun {
2463*4882a593Smuzhiyun 	struct ib_send_wr *wr;
2464*4882a593Smuzhiyun 	struct smbd_connection *info = smbdirect_mr->conn;
2465*4882a593Smuzhiyun 	int rc = 0;
2466*4882a593Smuzhiyun 
2467*4882a593Smuzhiyun 	if (smbdirect_mr->need_invalidate) {
2468*4882a593Smuzhiyun 		/* Need to finish local invalidation before returning */
2469*4882a593Smuzhiyun 		wr = &smbdirect_mr->inv_wr;
2470*4882a593Smuzhiyun 		wr->opcode = IB_WR_LOCAL_INV;
2471*4882a593Smuzhiyun 		smbdirect_mr->cqe.done = local_inv_done;
2472*4882a593Smuzhiyun 		wr->wr_cqe = &smbdirect_mr->cqe;
2473*4882a593Smuzhiyun 		wr->num_sge = 0;
2474*4882a593Smuzhiyun 		wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
2475*4882a593Smuzhiyun 		wr->send_flags = IB_SEND_SIGNALED;
2476*4882a593Smuzhiyun 
2477*4882a593Smuzhiyun 		init_completion(&smbdirect_mr->invalidate_done);
2478*4882a593Smuzhiyun 		rc = ib_post_send(info->id->qp, wr, NULL);
2479*4882a593Smuzhiyun 		if (rc) {
2480*4882a593Smuzhiyun 			log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
2481*4882a593Smuzhiyun 			smbd_disconnect_rdma_connection(info);
2482*4882a593Smuzhiyun 			goto done;
2483*4882a593Smuzhiyun 		}
2484*4882a593Smuzhiyun 		wait_for_completion(&smbdirect_mr->invalidate_done);
2485*4882a593Smuzhiyun 		smbdirect_mr->need_invalidate = false;
2486*4882a593Smuzhiyun 	} else
2487*4882a593Smuzhiyun 		/*
2488*4882a593Smuzhiyun 		 * For remote invalidation, just set it to MR_INVALIDATED
2489*4882a593Smuzhiyun 		 * and defer to mr_recovery_work to recover the MR for next use
2490*4882a593Smuzhiyun 		 */
2491*4882a593Smuzhiyun 		smbdirect_mr->state = MR_INVALIDATED;
2492*4882a593Smuzhiyun 
2493*4882a593Smuzhiyun 	if (smbdirect_mr->state == MR_INVALIDATED) {
2494*4882a593Smuzhiyun 		ib_dma_unmap_sg(
2495*4882a593Smuzhiyun 			info->id->device, smbdirect_mr->sgl,
2496*4882a593Smuzhiyun 			smbdirect_mr->sgl_count,
2497*4882a593Smuzhiyun 			smbdirect_mr->dir);
2498*4882a593Smuzhiyun 		smbdirect_mr->state = MR_READY;
2499*4882a593Smuzhiyun 		if (atomic_inc_return(&info->mr_ready_count) == 1)
2500*4882a593Smuzhiyun 			wake_up_interruptible(&info->wait_mr);
2501*4882a593Smuzhiyun 	} else
2502*4882a593Smuzhiyun 		/*
2503*4882a593Smuzhiyun 		 * Schedule the work to do MR recovery for future I/Os MR
2504*4882a593Smuzhiyun 		 * recovery is slow and don't want it to block current I/O
2505*4882a593Smuzhiyun 		 */
2506*4882a593Smuzhiyun 		queue_work(info->workqueue, &info->mr_recovery_work);
2507*4882a593Smuzhiyun 
2508*4882a593Smuzhiyun done:
2509*4882a593Smuzhiyun 	if (atomic_dec_and_test(&info->mr_used_count))
2510*4882a593Smuzhiyun 		wake_up(&info->wait_for_mr_cleanup);
2511*4882a593Smuzhiyun 
2512*4882a593Smuzhiyun 	return rc;
2513*4882a593Smuzhiyun }
2514