1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * VMware vSockets Driver
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/types.h>
9*4882a593Smuzhiyun #include <linux/socket.h>
10*4882a593Smuzhiyun #include <linux/stddef.h>
11*4882a593Smuzhiyun #include <net/sock.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include "vmci_transport_notify.h"
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
16*4882a593Smuzhiyun
vmci_transport_notify_waiting_write(struct vsock_sock * vsk)17*4882a593Smuzhiyun static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
18*4882a593Smuzhiyun {
19*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
20*4882a593Smuzhiyun bool retval;
21*4882a593Smuzhiyun u64 notify_limit;
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun if (!PKT_FIELD(vsk, peer_waiting_write))
24*4882a593Smuzhiyun return false;
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
27*4882a593Smuzhiyun /* When the sender blocks, we take that as a sign that the sender is
28*4882a593Smuzhiyun * faster than the receiver. To reduce the transmit rate of the sender,
29*4882a593Smuzhiyun * we delay the sending of the read notification by decreasing the
30*4882a593Smuzhiyun * write_notify_window. The notification is delayed until the number of
31*4882a593Smuzhiyun * bytes used in the queue drops below the write_notify_window.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
35*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write_detected) = true;
36*4882a593Smuzhiyun if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
37*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) =
38*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window);
39*4882a593Smuzhiyun } else {
40*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
41*4882a593Smuzhiyun if (PKT_FIELD(vsk, write_notify_window) <
42*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window))
43*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) =
44*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window);
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun notify_limit = vmci_trans(vsk)->consume_size -
49*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window);
50*4882a593Smuzhiyun #else
51*4882a593Smuzhiyun notify_limit = 0;
52*4882a593Smuzhiyun #endif
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /* For now we ignore the wait information and just see if the free
55*4882a593Smuzhiyun * space exceeds the notify limit. Note that improving this function
56*4882a593Smuzhiyun * to be more intelligent will not require a protocol change and will
57*4882a593Smuzhiyun * retain compatibility between endpoints with mixed versions of this
58*4882a593Smuzhiyun * function.
59*4882a593Smuzhiyun *
60*4882a593Smuzhiyun * The notify_limit is used to delay notifications in the case where
61*4882a593Smuzhiyun * flow control is enabled. Below the test is expressed in terms of
62*4882a593Smuzhiyun * free space in the queue: if free_space > ConsumeSize -
63*4882a593Smuzhiyun * write_notify_window then notify An alternate way of expressing this
64*4882a593Smuzhiyun * is to rewrite the expression to use the data ready in the receive
65*4882a593Smuzhiyun * queue: if write_notify_window > bufferReady then notify as
66*4882a593Smuzhiyun * free_space == ConsumeSize - bufferReady.
67*4882a593Smuzhiyun */
68*4882a593Smuzhiyun retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
69*4882a593Smuzhiyun notify_limit;
70*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
71*4882a593Smuzhiyun if (retval) {
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun * Once we notify the peer, we reset the detected flag so the
74*4882a593Smuzhiyun * next wait will again cause a decrease in the window size.
75*4882a593Smuzhiyun */
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write_detected) = false;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun #endif
80*4882a593Smuzhiyun return retval;
81*4882a593Smuzhiyun #else
82*4882a593Smuzhiyun return true;
83*4882a593Smuzhiyun #endif
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
vmci_transport_notify_waiting_read(struct vsock_sock * vsk)86*4882a593Smuzhiyun static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
89*4882a593Smuzhiyun if (!PKT_FIELD(vsk, peer_waiting_read))
90*4882a593Smuzhiyun return false;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun /* For now we ignore the wait information and just see if there is any
93*4882a593Smuzhiyun * data for our peer to read. Note that improving this function to be
94*4882a593Smuzhiyun * more intelligent will not require a protocol change and will retain
95*4882a593Smuzhiyun * compatibility between endpoints with mixed versions of this
96*4882a593Smuzhiyun * function.
97*4882a593Smuzhiyun */
98*4882a593Smuzhiyun return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
99*4882a593Smuzhiyun #else
100*4882a593Smuzhiyun return true;
101*4882a593Smuzhiyun #endif
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun static void
vmci_transport_handle_waiting_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)105*4882a593Smuzhiyun vmci_transport_handle_waiting_read(struct sock *sk,
106*4882a593Smuzhiyun struct vmci_transport_packet *pkt,
107*4882a593Smuzhiyun bool bottom_half,
108*4882a593Smuzhiyun struct sockaddr_vm *dst,
109*4882a593Smuzhiyun struct sockaddr_vm *src)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
112*4882a593Smuzhiyun struct vsock_sock *vsk;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun vsk = vsock_sk(sk);
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_read) = true;
117*4882a593Smuzhiyun memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
118*4882a593Smuzhiyun sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (vmci_transport_notify_waiting_read(vsk)) {
121*4882a593Smuzhiyun bool sent;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun if (bottom_half)
124*4882a593Smuzhiyun sent = vmci_transport_send_wrote_bh(dst, src) > 0;
125*4882a593Smuzhiyun else
126*4882a593Smuzhiyun sent = vmci_transport_send_wrote(sk) > 0;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun if (sent)
129*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_read) = false;
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun #endif
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun static void
vmci_transport_handle_waiting_write(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)135*4882a593Smuzhiyun vmci_transport_handle_waiting_write(struct sock *sk,
136*4882a593Smuzhiyun struct vmci_transport_packet *pkt,
137*4882a593Smuzhiyun bool bottom_half,
138*4882a593Smuzhiyun struct sockaddr_vm *dst,
139*4882a593Smuzhiyun struct sockaddr_vm *src)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
142*4882a593Smuzhiyun struct vsock_sock *vsk;
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun vsk = vsock_sk(sk);
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write) = true;
147*4882a593Smuzhiyun memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
148*4882a593Smuzhiyun sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun if (vmci_transport_notify_waiting_write(vsk)) {
151*4882a593Smuzhiyun bool sent;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun if (bottom_half)
154*4882a593Smuzhiyun sent = vmci_transport_send_read_bh(dst, src) > 0;
155*4882a593Smuzhiyun else
156*4882a593Smuzhiyun sent = vmci_transport_send_read(sk) > 0;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun if (sent)
159*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write) = false;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun #endif
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun static void
vmci_transport_handle_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)165*4882a593Smuzhiyun vmci_transport_handle_read(struct sock *sk,
166*4882a593Smuzhiyun struct vmci_transport_packet *pkt,
167*4882a593Smuzhiyun bool bottom_half,
168*4882a593Smuzhiyun struct sockaddr_vm *dst, struct sockaddr_vm *src)
169*4882a593Smuzhiyun {
170*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
171*4882a593Smuzhiyun struct vsock_sock *vsk;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun vsk = vsock_sk(sk);
174*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_write) = false;
175*4882a593Smuzhiyun #endif
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun sk->sk_write_space(sk);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
send_waiting_read(struct sock * sk,u64 room_needed)180*4882a593Smuzhiyun static bool send_waiting_read(struct sock *sk, u64 room_needed)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
183*4882a593Smuzhiyun struct vsock_sock *vsk;
184*4882a593Smuzhiyun struct vmci_transport_waiting_info waiting_info;
185*4882a593Smuzhiyun u64 tail;
186*4882a593Smuzhiyun u64 head;
187*4882a593Smuzhiyun u64 room_left;
188*4882a593Smuzhiyun bool ret;
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun vsk = vsock_sk(sk);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun if (PKT_FIELD(vsk, sent_waiting_read))
193*4882a593Smuzhiyun return true;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun if (PKT_FIELD(vsk, write_notify_window) <
196*4882a593Smuzhiyun vmci_trans(vsk)->consume_size)
197*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) =
198*4882a593Smuzhiyun min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
199*4882a593Smuzhiyun vmci_trans(vsk)->consume_size);
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
202*4882a593Smuzhiyun room_left = vmci_trans(vsk)->consume_size - head;
203*4882a593Smuzhiyun if (room_needed >= room_left) {
204*4882a593Smuzhiyun waiting_info.offset = room_needed - room_left;
205*4882a593Smuzhiyun waiting_info.generation =
206*4882a593Smuzhiyun PKT_FIELD(vsk, consume_q_generation) + 1;
207*4882a593Smuzhiyun } else {
208*4882a593Smuzhiyun waiting_info.offset = head + room_needed;
209*4882a593Smuzhiyun waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
213*4882a593Smuzhiyun if (ret)
214*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_read) = true;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun return ret;
217*4882a593Smuzhiyun #else
218*4882a593Smuzhiyun return true;
219*4882a593Smuzhiyun #endif
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
send_waiting_write(struct sock * sk,u64 room_needed)222*4882a593Smuzhiyun static bool send_waiting_write(struct sock *sk, u64 room_needed)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
225*4882a593Smuzhiyun struct vsock_sock *vsk;
226*4882a593Smuzhiyun struct vmci_transport_waiting_info waiting_info;
227*4882a593Smuzhiyun u64 tail;
228*4882a593Smuzhiyun u64 head;
229*4882a593Smuzhiyun u64 room_left;
230*4882a593Smuzhiyun bool ret;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun vsk = vsock_sk(sk);
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun if (PKT_FIELD(vsk, sent_waiting_write))
235*4882a593Smuzhiyun return true;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
238*4882a593Smuzhiyun room_left = vmci_trans(vsk)->produce_size - tail;
239*4882a593Smuzhiyun if (room_needed + 1 >= room_left) {
240*4882a593Smuzhiyun /* Wraps around to current generation. */
241*4882a593Smuzhiyun waiting_info.offset = room_needed + 1 - room_left;
242*4882a593Smuzhiyun waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
243*4882a593Smuzhiyun } else {
244*4882a593Smuzhiyun waiting_info.offset = tail + room_needed + 1;
245*4882a593Smuzhiyun waiting_info.generation =
246*4882a593Smuzhiyun PKT_FIELD(vsk, produce_q_generation) - 1;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
250*4882a593Smuzhiyun if (ret)
251*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_write) = true;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun return ret;
254*4882a593Smuzhiyun #else
255*4882a593Smuzhiyun return true;
256*4882a593Smuzhiyun #endif
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
vmci_transport_send_read_notification(struct sock * sk)259*4882a593Smuzhiyun static int vmci_transport_send_read_notification(struct sock *sk)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun struct vsock_sock *vsk;
262*4882a593Smuzhiyun bool sent_read;
263*4882a593Smuzhiyun unsigned int retries;
264*4882a593Smuzhiyun int err;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun vsk = vsock_sk(sk);
267*4882a593Smuzhiyun sent_read = false;
268*4882a593Smuzhiyun retries = 0;
269*4882a593Smuzhiyun err = 0;
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun if (vmci_transport_notify_waiting_write(vsk)) {
272*4882a593Smuzhiyun /* Notify the peer that we have read, retrying the send on
273*4882a593Smuzhiyun * failure up to our maximum value. XXX For now we just log
274*4882a593Smuzhiyun * the failure, but later we should schedule a work item to
275*4882a593Smuzhiyun * handle the resend until it succeeds. That would require
276*4882a593Smuzhiyun * keeping track of work items in the vsk and cleaning them up
277*4882a593Smuzhiyun * upon socket close.
278*4882a593Smuzhiyun */
279*4882a593Smuzhiyun while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
280*4882a593Smuzhiyun !sent_read &&
281*4882a593Smuzhiyun retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
282*4882a593Smuzhiyun err = vmci_transport_send_read(sk);
283*4882a593Smuzhiyun if (err >= 0)
284*4882a593Smuzhiyun sent_read = true;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun retries++;
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
290*4882a593Smuzhiyun pr_err("%p unable to send read notify to peer\n", sk);
291*4882a593Smuzhiyun else
292*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
293*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write) = false;
294*4882a593Smuzhiyun #endif
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun return err;
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun static void
vmci_transport_handle_wrote(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)301*4882a593Smuzhiyun vmci_transport_handle_wrote(struct sock *sk,
302*4882a593Smuzhiyun struct vmci_transport_packet *pkt,
303*4882a593Smuzhiyun bool bottom_half,
304*4882a593Smuzhiyun struct sockaddr_vm *dst, struct sockaddr_vm *src)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
307*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
308*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_read) = false;
309*4882a593Smuzhiyun #endif
310*4882a593Smuzhiyun sk->sk_data_ready(sk);
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun
vmci_transport_notify_pkt_socket_init(struct sock * sk)313*4882a593Smuzhiyun static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
318*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
319*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_read) = false;
320*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write) = false;
321*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_write_detected) = false;
322*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_read) = false;
323*4882a593Smuzhiyun PKT_FIELD(vsk, sent_waiting_write) = false;
324*4882a593Smuzhiyun PKT_FIELD(vsk, produce_q_generation) = 0;
325*4882a593Smuzhiyun PKT_FIELD(vsk, consume_q_generation) = 0;
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
328*4882a593Smuzhiyun sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
329*4882a593Smuzhiyun memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
330*4882a593Smuzhiyun sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
vmci_transport_notify_pkt_socket_destruct(struct vsock_sock * vsk)333*4882a593Smuzhiyun static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
334*4882a593Smuzhiyun {
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_poll_in(struct sock * sk,size_t target,bool * data_ready_now)338*4882a593Smuzhiyun vmci_transport_notify_pkt_poll_in(struct sock *sk,
339*4882a593Smuzhiyun size_t target, bool *data_ready_now)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun if (vsock_stream_has_data(vsk)) {
344*4882a593Smuzhiyun *data_ready_now = true;
345*4882a593Smuzhiyun } else {
346*4882a593Smuzhiyun /* We can't read right now because there is nothing in the
347*4882a593Smuzhiyun * queue. Ask for notifications when there is something to
348*4882a593Smuzhiyun * read.
349*4882a593Smuzhiyun */
350*4882a593Smuzhiyun if (sk->sk_state == TCP_ESTABLISHED) {
351*4882a593Smuzhiyun if (!send_waiting_read(sk, 1))
352*4882a593Smuzhiyun return -1;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun *data_ready_now = false;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun return 0;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_poll_out(struct sock * sk,size_t target,bool * space_avail_now)362*4882a593Smuzhiyun vmci_transport_notify_pkt_poll_out(struct sock *sk,
363*4882a593Smuzhiyun size_t target, bool *space_avail_now)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun s64 produce_q_free_space;
366*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun produce_q_free_space = vsock_stream_has_space(vsk);
369*4882a593Smuzhiyun if (produce_q_free_space > 0) {
370*4882a593Smuzhiyun *space_avail_now = true;
371*4882a593Smuzhiyun return 0;
372*4882a593Smuzhiyun } else if (produce_q_free_space == 0) {
373*4882a593Smuzhiyun /* This is a connected socket but we can't currently send data.
374*4882a593Smuzhiyun * Notify the peer that we are waiting if the queue is full. We
375*4882a593Smuzhiyun * only send a waiting write if the queue is full because
376*4882a593Smuzhiyun * otherwise we end up in an infinite WAITING_WRITE, READ,
377*4882a593Smuzhiyun * WAITING_WRITE, READ, etc. loop. Treat failing to send the
378*4882a593Smuzhiyun * notification as a socket error, passing that back through
379*4882a593Smuzhiyun * the mask.
380*4882a593Smuzhiyun */
381*4882a593Smuzhiyun if (!send_waiting_write(sk, 1))
382*4882a593Smuzhiyun return -1;
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun *space_avail_now = false;
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun return 0;
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_recv_init(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)391*4882a593Smuzhiyun vmci_transport_notify_pkt_recv_init(
392*4882a593Smuzhiyun struct sock *sk,
393*4882a593Smuzhiyun size_t target,
394*4882a593Smuzhiyun struct vmci_transport_recv_notify_data *data)
395*4882a593Smuzhiyun {
396*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
399*4882a593Smuzhiyun data->consume_head = 0;
400*4882a593Smuzhiyun data->produce_tail = 0;
401*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
402*4882a593Smuzhiyun data->notify_on_block = false;
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
405*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window) = target + 1;
406*4882a593Smuzhiyun if (PKT_FIELD(vsk, write_notify_window) <
407*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window)) {
408*4882a593Smuzhiyun /* If the current window is smaller than the new
409*4882a593Smuzhiyun * minimal window size, we need to reevaluate whether
410*4882a593Smuzhiyun * we need to notify the sender. If the number of ready
411*4882a593Smuzhiyun * bytes are smaller than the new window, we need to
412*4882a593Smuzhiyun * send a notification to the sender before we block.
413*4882a593Smuzhiyun */
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) =
416*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window);
417*4882a593Smuzhiyun data->notify_on_block = true;
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun #endif
421*4882a593Smuzhiyun #endif
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun return 0;
424*4882a593Smuzhiyun }
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_recv_pre_block(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)427*4882a593Smuzhiyun vmci_transport_notify_pkt_recv_pre_block(
428*4882a593Smuzhiyun struct sock *sk,
429*4882a593Smuzhiyun size_t target,
430*4882a593Smuzhiyun struct vmci_transport_recv_notify_data *data)
431*4882a593Smuzhiyun {
432*4882a593Smuzhiyun int err = 0;
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun /* Notify our peer that we are waiting for data to read. */
435*4882a593Smuzhiyun if (!send_waiting_read(sk, target)) {
436*4882a593Smuzhiyun err = -EHOSTUNREACH;
437*4882a593Smuzhiyun return err;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
440*4882a593Smuzhiyun if (data->notify_on_block) {
441*4882a593Smuzhiyun err = vmci_transport_send_read_notification(sk);
442*4882a593Smuzhiyun if (err < 0)
443*4882a593Smuzhiyun return err;
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun data->notify_on_block = false;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun #endif
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun return err;
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_recv_pre_dequeue(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)453*4882a593Smuzhiyun vmci_transport_notify_pkt_recv_pre_dequeue(
454*4882a593Smuzhiyun struct sock *sk,
455*4882a593Smuzhiyun size_t target,
456*4882a593Smuzhiyun struct vmci_transport_recv_notify_data *data)
457*4882a593Smuzhiyun {
458*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun /* Now consume up to len bytes from the queue. Note that since we have
461*4882a593Smuzhiyun * the socket locked we should copy at least ready bytes.
462*4882a593Smuzhiyun */
463*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
464*4882a593Smuzhiyun vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
465*4882a593Smuzhiyun &data->produce_tail,
466*4882a593Smuzhiyun &data->consume_head);
467*4882a593Smuzhiyun #endif
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun return 0;
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_recv_post_dequeue(struct sock * sk,size_t target,ssize_t copied,bool data_read,struct vmci_transport_recv_notify_data * data)473*4882a593Smuzhiyun vmci_transport_notify_pkt_recv_post_dequeue(
474*4882a593Smuzhiyun struct sock *sk,
475*4882a593Smuzhiyun size_t target,
476*4882a593Smuzhiyun ssize_t copied,
477*4882a593Smuzhiyun bool data_read,
478*4882a593Smuzhiyun struct vmci_transport_recv_notify_data *data)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun struct vsock_sock *vsk;
481*4882a593Smuzhiyun int err;
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun vsk = vsock_sk(sk);
484*4882a593Smuzhiyun err = 0;
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun if (data_read) {
487*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
488*4882a593Smuzhiyun /* Detect a wrap-around to maintain queue generation. Note
489*4882a593Smuzhiyun * that this is safe since we hold the socket lock across the
490*4882a593Smuzhiyun * two queue pair operations.
491*4882a593Smuzhiyun */
492*4882a593Smuzhiyun if (copied >=
493*4882a593Smuzhiyun vmci_trans(vsk)->consume_size - data->consume_head)
494*4882a593Smuzhiyun PKT_FIELD(vsk, consume_q_generation)++;
495*4882a593Smuzhiyun #endif
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun err = vmci_transport_send_read_notification(sk);
498*4882a593Smuzhiyun if (err < 0)
499*4882a593Smuzhiyun return err;
500*4882a593Smuzhiyun
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun return err;
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_send_init(struct sock * sk,struct vmci_transport_send_notify_data * data)506*4882a593Smuzhiyun vmci_transport_notify_pkt_send_init(
507*4882a593Smuzhiyun struct sock *sk,
508*4882a593Smuzhiyun struct vmci_transport_send_notify_data *data)
509*4882a593Smuzhiyun {
510*4882a593Smuzhiyun #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
511*4882a593Smuzhiyun data->consume_head = 0;
512*4882a593Smuzhiyun data->produce_tail = 0;
513*4882a593Smuzhiyun #endif
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun return 0;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_send_pre_block(struct sock * sk,struct vmci_transport_send_notify_data * data)519*4882a593Smuzhiyun vmci_transport_notify_pkt_send_pre_block(
520*4882a593Smuzhiyun struct sock *sk,
521*4882a593Smuzhiyun struct vmci_transport_send_notify_data *data)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun /* Notify our peer that we are waiting for room to write. */
524*4882a593Smuzhiyun if (!send_waiting_write(sk, 1))
525*4882a593Smuzhiyun return -EHOSTUNREACH;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun return 0;
528*4882a593Smuzhiyun }
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_send_pre_enqueue(struct sock * sk,struct vmci_transport_send_notify_data * data)531*4882a593Smuzhiyun vmci_transport_notify_pkt_send_pre_enqueue(
532*4882a593Smuzhiyun struct sock *sk,
533*4882a593Smuzhiyun struct vmci_transport_send_notify_data *data)
534*4882a593Smuzhiyun {
535*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
538*4882a593Smuzhiyun vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
539*4882a593Smuzhiyun &data->produce_tail,
540*4882a593Smuzhiyun &data->consume_head);
541*4882a593Smuzhiyun #endif
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun return 0;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun static int
vmci_transport_notify_pkt_send_post_enqueue(struct sock * sk,ssize_t written,struct vmci_transport_send_notify_data * data)547*4882a593Smuzhiyun vmci_transport_notify_pkt_send_post_enqueue(
548*4882a593Smuzhiyun struct sock *sk,
549*4882a593Smuzhiyun ssize_t written,
550*4882a593Smuzhiyun struct vmci_transport_send_notify_data *data)
551*4882a593Smuzhiyun {
552*4882a593Smuzhiyun int err = 0;
553*4882a593Smuzhiyun struct vsock_sock *vsk;
554*4882a593Smuzhiyun bool sent_wrote = false;
555*4882a593Smuzhiyun int retries = 0;
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun vsk = vsock_sk(sk);
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
560*4882a593Smuzhiyun /* Detect a wrap-around to maintain queue generation. Note that this
561*4882a593Smuzhiyun * is safe since we hold the socket lock across the two queue pair
562*4882a593Smuzhiyun * operations.
563*4882a593Smuzhiyun */
564*4882a593Smuzhiyun if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
565*4882a593Smuzhiyun PKT_FIELD(vsk, produce_q_generation)++;
566*4882a593Smuzhiyun
567*4882a593Smuzhiyun #endif
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun if (vmci_transport_notify_waiting_read(vsk)) {
570*4882a593Smuzhiyun /* Notify the peer that we have written, retrying the send on
571*4882a593Smuzhiyun * failure up to our maximum value. See the XXX comment for the
572*4882a593Smuzhiyun * corresponding piece of code in StreamRecvmsg() for potential
573*4882a593Smuzhiyun * improvements.
574*4882a593Smuzhiyun */
575*4882a593Smuzhiyun while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
576*4882a593Smuzhiyun !sent_wrote &&
577*4882a593Smuzhiyun retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
578*4882a593Smuzhiyun err = vmci_transport_send_wrote(sk);
579*4882a593Smuzhiyun if (err >= 0)
580*4882a593Smuzhiyun sent_wrote = true;
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun retries++;
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
586*4882a593Smuzhiyun pr_err("%p unable to send wrote notify to peer\n", sk);
587*4882a593Smuzhiyun return err;
588*4882a593Smuzhiyun } else {
589*4882a593Smuzhiyun #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
590*4882a593Smuzhiyun PKT_FIELD(vsk, peer_waiting_read) = false;
591*4882a593Smuzhiyun #endif
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun return err;
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun static void
vmci_transport_notify_pkt_handle_pkt(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src,bool * pkt_processed)598*4882a593Smuzhiyun vmci_transport_notify_pkt_handle_pkt(
599*4882a593Smuzhiyun struct sock *sk,
600*4882a593Smuzhiyun struct vmci_transport_packet *pkt,
601*4882a593Smuzhiyun bool bottom_half,
602*4882a593Smuzhiyun struct sockaddr_vm *dst,
603*4882a593Smuzhiyun struct sockaddr_vm *src, bool *pkt_processed)
604*4882a593Smuzhiyun {
605*4882a593Smuzhiyun bool processed = false;
606*4882a593Smuzhiyun
607*4882a593Smuzhiyun switch (pkt->type) {
608*4882a593Smuzhiyun case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
609*4882a593Smuzhiyun vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
610*4882a593Smuzhiyun processed = true;
611*4882a593Smuzhiyun break;
612*4882a593Smuzhiyun case VMCI_TRANSPORT_PACKET_TYPE_READ:
613*4882a593Smuzhiyun vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
614*4882a593Smuzhiyun processed = true;
615*4882a593Smuzhiyun break;
616*4882a593Smuzhiyun case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
617*4882a593Smuzhiyun vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
618*4882a593Smuzhiyun dst, src);
619*4882a593Smuzhiyun processed = true;
620*4882a593Smuzhiyun break;
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
623*4882a593Smuzhiyun vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
624*4882a593Smuzhiyun dst, src);
625*4882a593Smuzhiyun processed = true;
626*4882a593Smuzhiyun break;
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun if (pkt_processed)
630*4882a593Smuzhiyun *pkt_processed = processed;
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun
vmci_transport_notify_pkt_process_request(struct sock * sk)633*4882a593Smuzhiyun static void vmci_transport_notify_pkt_process_request(struct sock *sk)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
638*4882a593Smuzhiyun if (vmci_trans(vsk)->consume_size <
639*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window))
640*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window) =
641*4882a593Smuzhiyun vmci_trans(vsk)->consume_size;
642*4882a593Smuzhiyun }
643*4882a593Smuzhiyun
vmci_transport_notify_pkt_process_negotiate(struct sock * sk)644*4882a593Smuzhiyun static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
645*4882a593Smuzhiyun {
646*4882a593Smuzhiyun struct vsock_sock *vsk = vsock_sk(sk);
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
649*4882a593Smuzhiyun if (vmci_trans(vsk)->consume_size <
650*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window))
651*4882a593Smuzhiyun PKT_FIELD(vsk, write_notify_min_window) =
652*4882a593Smuzhiyun vmci_trans(vsk)->consume_size;
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun /* Socket control packet based operations. */
656*4882a593Smuzhiyun const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
657*4882a593Smuzhiyun .socket_init = vmci_transport_notify_pkt_socket_init,
658*4882a593Smuzhiyun .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
659*4882a593Smuzhiyun .poll_in = vmci_transport_notify_pkt_poll_in,
660*4882a593Smuzhiyun .poll_out = vmci_transport_notify_pkt_poll_out,
661*4882a593Smuzhiyun .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
662*4882a593Smuzhiyun .recv_init = vmci_transport_notify_pkt_recv_init,
663*4882a593Smuzhiyun .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
664*4882a593Smuzhiyun .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
665*4882a593Smuzhiyun .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
666*4882a593Smuzhiyun .send_init = vmci_transport_notify_pkt_send_init,
667*4882a593Smuzhiyun .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
668*4882a593Smuzhiyun .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
669*4882a593Smuzhiyun .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
670*4882a593Smuzhiyun .process_request = vmci_transport_notify_pkt_process_request,
671*4882a593Smuzhiyun .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
672*4882a593Smuzhiyun };
673