xref: /OK3568_Linux_fs/kernel/net/mptcp/subflow.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Multipath TCP
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Copyright (c) 2017 - 2019, Intel Corporation.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #define pr_fmt(fmt) "MPTCP: " fmt
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/kernel.h>
10*4882a593Smuzhiyun #include <linux/module.h>
11*4882a593Smuzhiyun #include <linux/netdevice.h>
12*4882a593Smuzhiyun #include <crypto/algapi.h>
13*4882a593Smuzhiyun #include <crypto/sha.h>
14*4882a593Smuzhiyun #include <net/sock.h>
15*4882a593Smuzhiyun #include <net/inet_common.h>
16*4882a593Smuzhiyun #include <net/inet_hashtables.h>
17*4882a593Smuzhiyun #include <net/protocol.h>
18*4882a593Smuzhiyun #include <net/tcp.h>
19*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
20*4882a593Smuzhiyun #include <net/ip6_route.h>
21*4882a593Smuzhiyun #endif
22*4882a593Smuzhiyun #include <net/mptcp.h>
23*4882a593Smuzhiyun #include <uapi/linux/mptcp.h>
24*4882a593Smuzhiyun #include "protocol.h"
25*4882a593Smuzhiyun #include "mib.h"
26*4882a593Smuzhiyun 
SUBFLOW_REQ_INC_STATS(struct request_sock * req,enum linux_mptcp_mib_field field)27*4882a593Smuzhiyun static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
28*4882a593Smuzhiyun 				  enum linux_mptcp_mib_field field)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun 	MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
31*4882a593Smuzhiyun }
32*4882a593Smuzhiyun 
subflow_req_destructor(struct request_sock * req)33*4882a593Smuzhiyun static void subflow_req_destructor(struct request_sock *req)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	pr_debug("subflow_req=%p", subflow_req);
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	if (subflow_req->msk)
40*4882a593Smuzhiyun 		sock_put((struct sock *)subflow_req->msk);
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 	mptcp_token_destroy_request(req);
43*4882a593Smuzhiyun 	tcp_request_sock_ops.destructor(req);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun 
subflow_generate_hmac(u64 key1,u64 key2,u32 nonce1,u32 nonce2,void * hmac)46*4882a593Smuzhiyun static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
47*4882a593Smuzhiyun 				  void *hmac)
48*4882a593Smuzhiyun {
49*4882a593Smuzhiyun 	u8 msg[8];
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	put_unaligned_be32(nonce1, &msg[0]);
52*4882a593Smuzhiyun 	put_unaligned_be32(nonce2, &msg[4]);
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun 
mptcp_can_accept_new_subflow(const struct mptcp_sock * msk)57*4882a593Smuzhiyun static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun 	return mptcp_is_fully_established((void *)msk) &&
60*4882a593Smuzhiyun 	       READ_ONCE(msk->pm.accept_subflow);
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun /* validate received token and create truncated hmac and nonce for SYN-ACK */
subflow_token_join_request(struct request_sock * req,const struct sk_buff * skb)64*4882a593Smuzhiyun static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
65*4882a593Smuzhiyun 						     const struct sk_buff *skb)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
68*4882a593Smuzhiyun 	u8 hmac[SHA256_DIGEST_SIZE];
69*4882a593Smuzhiyun 	struct mptcp_sock *msk;
70*4882a593Smuzhiyun 	int local_id;
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
73*4882a593Smuzhiyun 	if (!msk) {
74*4882a593Smuzhiyun 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
75*4882a593Smuzhiyun 		return NULL;
76*4882a593Smuzhiyun 	}
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
79*4882a593Smuzhiyun 	if (local_id < 0) {
80*4882a593Smuzhiyun 		sock_put((struct sock *)msk);
81*4882a593Smuzhiyun 		return NULL;
82*4882a593Smuzhiyun 	}
83*4882a593Smuzhiyun 	subflow_req->local_id = local_id;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	subflow_generate_hmac(msk->local_key, msk->remote_key,
88*4882a593Smuzhiyun 			      subflow_req->local_nonce,
89*4882a593Smuzhiyun 			      subflow_req->remote_nonce, hmac);
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	subflow_req->thmac = get_unaligned_be64(hmac);
92*4882a593Smuzhiyun 	return msk;
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun 
__subflow_init_req(struct request_sock * req,const struct sock * sk_listener)95*4882a593Smuzhiyun static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	subflow_req->mp_capable = 0;
100*4882a593Smuzhiyun 	subflow_req->mp_join = 0;
101*4882a593Smuzhiyun 	subflow_req->msk = NULL;
102*4882a593Smuzhiyun 	mptcp_token_init_request(req);
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun #ifdef CONFIG_TCP_MD5SIG
105*4882a593Smuzhiyun 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
106*4882a593Smuzhiyun 	 * TCP option space.
107*4882a593Smuzhiyun 	 */
108*4882a593Smuzhiyun 	if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
109*4882a593Smuzhiyun 		return -EINVAL;
110*4882a593Smuzhiyun #endif
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	return 0;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun 
subflow_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)115*4882a593Smuzhiyun static void subflow_init_req(struct request_sock *req,
116*4882a593Smuzhiyun 			     const struct sock *sk_listener,
117*4882a593Smuzhiyun 			     struct sk_buff *skb)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
120*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
121*4882a593Smuzhiyun 	struct mptcp_options_received mp_opt;
122*4882a593Smuzhiyun 	int ret;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	ret = __subflow_init_req(req, sk_listener);
127*4882a593Smuzhiyun 	if (ret)
128*4882a593Smuzhiyun 		return;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	mptcp_get_options(skb, &mp_opt);
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	if (mp_opt.mp_capable) {
133*4882a593Smuzhiyun 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 		if (mp_opt.mp_join)
136*4882a593Smuzhiyun 			return;
137*4882a593Smuzhiyun 	} else if (mp_opt.mp_join) {
138*4882a593Smuzhiyun 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
139*4882a593Smuzhiyun 	}
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	if (mp_opt.mp_capable && listener->request_mptcp) {
142*4882a593Smuzhiyun 		int err, retries = 4;
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
145*4882a593Smuzhiyun again:
146*4882a593Smuzhiyun 		do {
147*4882a593Smuzhiyun 			get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
148*4882a593Smuzhiyun 		} while (subflow_req->local_key == 0);
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 		if (unlikely(req->syncookie)) {
151*4882a593Smuzhiyun 			mptcp_crypto_key_sha(subflow_req->local_key,
152*4882a593Smuzhiyun 					     &subflow_req->token,
153*4882a593Smuzhiyun 					     &subflow_req->idsn);
154*4882a593Smuzhiyun 			if (mptcp_token_exists(subflow_req->token)) {
155*4882a593Smuzhiyun 				if (retries-- > 0)
156*4882a593Smuzhiyun 					goto again;
157*4882a593Smuzhiyun 			} else {
158*4882a593Smuzhiyun 				subflow_req->mp_capable = 1;
159*4882a593Smuzhiyun 			}
160*4882a593Smuzhiyun 			return;
161*4882a593Smuzhiyun 		}
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 		err = mptcp_token_new_request(req);
164*4882a593Smuzhiyun 		if (err == 0)
165*4882a593Smuzhiyun 			subflow_req->mp_capable = 1;
166*4882a593Smuzhiyun 		else if (retries-- > 0)
167*4882a593Smuzhiyun 			goto again;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	} else if (mp_opt.mp_join && listener->request_mptcp) {
170*4882a593Smuzhiyun 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
171*4882a593Smuzhiyun 		subflow_req->mp_join = 1;
172*4882a593Smuzhiyun 		subflow_req->backup = mp_opt.backup;
173*4882a593Smuzhiyun 		subflow_req->remote_id = mp_opt.join_id;
174*4882a593Smuzhiyun 		subflow_req->token = mp_opt.token;
175*4882a593Smuzhiyun 		subflow_req->remote_nonce = mp_opt.nonce;
176*4882a593Smuzhiyun 		subflow_req->msk = subflow_token_join_request(req, skb);
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 		if (unlikely(req->syncookie) && subflow_req->msk) {
179*4882a593Smuzhiyun 			if (mptcp_can_accept_new_subflow(subflow_req->msk))
180*4882a593Smuzhiyun 				subflow_init_req_cookie_join_save(subflow_req, skb);
181*4882a593Smuzhiyun 		}
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 		pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
184*4882a593Smuzhiyun 			 subflow_req->remote_nonce, subflow_req->msk);
185*4882a593Smuzhiyun 	}
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
mptcp_subflow_init_cookie_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)188*4882a593Smuzhiyun int mptcp_subflow_init_cookie_req(struct request_sock *req,
189*4882a593Smuzhiyun 				  const struct sock *sk_listener,
190*4882a593Smuzhiyun 				  struct sk_buff *skb)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
193*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
194*4882a593Smuzhiyun 	struct mptcp_options_received mp_opt;
195*4882a593Smuzhiyun 	int err;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	err = __subflow_init_req(req, sk_listener);
198*4882a593Smuzhiyun 	if (err)
199*4882a593Smuzhiyun 		return err;
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	mptcp_get_options(skb, &mp_opt);
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	if (mp_opt.mp_capable && mp_opt.mp_join)
204*4882a593Smuzhiyun 		return -EINVAL;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	if (mp_opt.mp_capable && listener->request_mptcp) {
207*4882a593Smuzhiyun 		if (mp_opt.sndr_key == 0)
208*4882a593Smuzhiyun 			return -EINVAL;
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 		subflow_req->local_key = mp_opt.rcvr_key;
211*4882a593Smuzhiyun 		err = mptcp_token_new_request(req);
212*4882a593Smuzhiyun 		if (err)
213*4882a593Smuzhiyun 			return err;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		subflow_req->mp_capable = 1;
216*4882a593Smuzhiyun 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
217*4882a593Smuzhiyun 	} else if (mp_opt.mp_join && listener->request_mptcp) {
218*4882a593Smuzhiyun 		if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
219*4882a593Smuzhiyun 			return -EINVAL;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 		if (mptcp_can_accept_new_subflow(subflow_req->msk))
222*4882a593Smuzhiyun 			subflow_req->mp_join = 1;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
225*4882a593Smuzhiyun 	}
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	return 0;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
230*4882a593Smuzhiyun 
subflow_v4_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)231*4882a593Smuzhiyun static void subflow_v4_init_req(struct request_sock *req,
232*4882a593Smuzhiyun 				const struct sock *sk_listener,
233*4882a593Smuzhiyun 				struct sk_buff *skb)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	tcp_rsk(req)->is_mptcp = 1;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	subflow_init_req(req, sk_listener, skb);
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)243*4882a593Smuzhiyun static void subflow_v6_init_req(struct request_sock *req,
244*4882a593Smuzhiyun 				const struct sock *sk_listener,
245*4882a593Smuzhiyun 				struct sk_buff *skb)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	tcp_rsk(req)->is_mptcp = 1;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	subflow_init_req(req, sk_listener, skb);
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun #endif
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun /* validate received truncated hmac and create hmac for third ACK */
subflow_thmac_valid(struct mptcp_subflow_context * subflow)256*4882a593Smuzhiyun static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	u8 hmac[SHA256_DIGEST_SIZE];
259*4882a593Smuzhiyun 	u64 thmac;
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	subflow_generate_hmac(subflow->remote_key, subflow->local_key,
262*4882a593Smuzhiyun 			      subflow->remote_nonce, subflow->local_nonce,
263*4882a593Smuzhiyun 			      hmac);
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	thmac = get_unaligned_be64(hmac);
266*4882a593Smuzhiyun 	pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
267*4882a593Smuzhiyun 		 subflow, subflow->token,
268*4882a593Smuzhiyun 		 (unsigned long long)thmac,
269*4882a593Smuzhiyun 		 (unsigned long long)subflow->thmac);
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	return thmac == subflow->thmac;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
mptcp_subflow_reset(struct sock * ssk)274*4882a593Smuzhiyun void mptcp_subflow_reset(struct sock *ssk)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
277*4882a593Smuzhiyun 	struct sock *sk = subflow->conn;
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	tcp_set_state(ssk, TCP_CLOSE);
280*4882a593Smuzhiyun 	tcp_send_active_reset(ssk, GFP_ATOMIC);
281*4882a593Smuzhiyun 	tcp_done(ssk);
282*4882a593Smuzhiyun 	if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
283*4882a593Smuzhiyun 	    schedule_work(&mptcp_sk(sk)->work))
284*4882a593Smuzhiyun 		sock_hold(sk);
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun 
subflow_finish_connect(struct sock * sk,const struct sk_buff * skb)287*4882a593Smuzhiyun static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
290*4882a593Smuzhiyun 	struct mptcp_options_received mp_opt;
291*4882a593Smuzhiyun 	struct sock *parent = subflow->conn;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
296*4882a593Smuzhiyun 		inet_sk_state_store(parent, TCP_ESTABLISHED);
297*4882a593Smuzhiyun 		parent->sk_state_change(parent);
298*4882a593Smuzhiyun 	}
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/* be sure no special action on any packet other than syn-ack */
301*4882a593Smuzhiyun 	if (subflow->conn_finished)
302*4882a593Smuzhiyun 		return;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	subflow->rel_write_seq = 1;
305*4882a593Smuzhiyun 	subflow->conn_finished = 1;
306*4882a593Smuzhiyun 	subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
307*4882a593Smuzhiyun 	pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	mptcp_get_options(skb, &mp_opt);
310*4882a593Smuzhiyun 	if (subflow->request_mptcp) {
311*4882a593Smuzhiyun 		if (!mp_opt.mp_capable) {
312*4882a593Smuzhiyun 			MPTCP_INC_STATS(sock_net(sk),
313*4882a593Smuzhiyun 					MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
314*4882a593Smuzhiyun 			mptcp_do_fallback(sk);
315*4882a593Smuzhiyun 			pr_fallback(mptcp_sk(subflow->conn));
316*4882a593Smuzhiyun 			goto fallback;
317*4882a593Smuzhiyun 		}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 		subflow->mp_capable = 1;
320*4882a593Smuzhiyun 		subflow->can_ack = 1;
321*4882a593Smuzhiyun 		subflow->remote_key = mp_opt.sndr_key;
322*4882a593Smuzhiyun 		pr_debug("subflow=%p, remote_key=%llu", subflow,
323*4882a593Smuzhiyun 			 subflow->remote_key);
324*4882a593Smuzhiyun 		mptcp_finish_connect(sk);
325*4882a593Smuzhiyun 	} else if (subflow->request_join) {
326*4882a593Smuzhiyun 		u8 hmac[SHA256_DIGEST_SIZE];
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 		if (!mp_opt.mp_join)
329*4882a593Smuzhiyun 			goto do_reset;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 		subflow->thmac = mp_opt.thmac;
332*4882a593Smuzhiyun 		subflow->remote_nonce = mp_opt.nonce;
333*4882a593Smuzhiyun 		pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
334*4882a593Smuzhiyun 			 subflow->thmac, subflow->remote_nonce);
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 		if (!subflow_thmac_valid(subflow)) {
337*4882a593Smuzhiyun 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
338*4882a593Smuzhiyun 			goto do_reset;
339*4882a593Smuzhiyun 		}
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 		if (!mptcp_finish_join(sk))
342*4882a593Smuzhiyun 			goto do_reset;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 		subflow_generate_hmac(subflow->local_key, subflow->remote_key,
345*4882a593Smuzhiyun 				      subflow->local_nonce,
346*4882a593Smuzhiyun 				      subflow->remote_nonce,
347*4882a593Smuzhiyun 				      hmac);
348*4882a593Smuzhiyun 		memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 		subflow->mp_join = 1;
351*4882a593Smuzhiyun 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
352*4882a593Smuzhiyun 	} else if (mptcp_check_fallback(sk)) {
353*4882a593Smuzhiyun fallback:
354*4882a593Smuzhiyun 		mptcp_rcv_space_init(mptcp_sk(parent), sk);
355*4882a593Smuzhiyun 	}
356*4882a593Smuzhiyun 	return;
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun do_reset:
359*4882a593Smuzhiyun 	mptcp_subflow_reset(sk);
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun struct request_sock_ops mptcp_subflow_request_sock_ops;
363*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
364*4882a593Smuzhiyun static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
365*4882a593Smuzhiyun 
subflow_v4_conn_request(struct sock * sk,struct sk_buff * skb)366*4882a593Smuzhiyun static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	pr_debug("subflow=%p", subflow);
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	/* Never answer to SYNs sent to broadcast or multicast */
373*4882a593Smuzhiyun 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
374*4882a593Smuzhiyun 		goto drop;
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	return tcp_conn_request(&mptcp_subflow_request_sock_ops,
377*4882a593Smuzhiyun 				&subflow_request_sock_ipv4_ops,
378*4882a593Smuzhiyun 				sk, skb);
379*4882a593Smuzhiyun drop:
380*4882a593Smuzhiyun 	tcp_listendrop(sk);
381*4882a593Smuzhiyun 	return 0;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
385*4882a593Smuzhiyun static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
386*4882a593Smuzhiyun static struct inet_connection_sock_af_ops subflow_v6_specific;
387*4882a593Smuzhiyun static struct inet_connection_sock_af_ops subflow_v6m_specific;
388*4882a593Smuzhiyun 
subflow_v6_conn_request(struct sock * sk,struct sk_buff * skb)389*4882a593Smuzhiyun static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
390*4882a593Smuzhiyun {
391*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	pr_debug("subflow=%p", subflow);
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	if (skb->protocol == htons(ETH_P_IP))
396*4882a593Smuzhiyun 		return subflow_v4_conn_request(sk, skb);
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	if (!ipv6_unicast_destination(skb))
399*4882a593Smuzhiyun 		goto drop;
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
402*4882a593Smuzhiyun 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
403*4882a593Smuzhiyun 		return 0;
404*4882a593Smuzhiyun 	}
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	return tcp_conn_request(&mptcp_subflow_request_sock_ops,
407*4882a593Smuzhiyun 				&subflow_request_sock_ipv6_ops, sk, skb);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun drop:
410*4882a593Smuzhiyun 	tcp_listendrop(sk);
411*4882a593Smuzhiyun 	return 0; /* don't send reset */
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun #endif
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun /* validate hmac received in third ACK */
subflow_hmac_valid(const struct request_sock * req,const struct mptcp_options_received * mp_opt)416*4882a593Smuzhiyun static bool subflow_hmac_valid(const struct request_sock *req,
417*4882a593Smuzhiyun 			       const struct mptcp_options_received *mp_opt)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun 	const struct mptcp_subflow_request_sock *subflow_req;
420*4882a593Smuzhiyun 	u8 hmac[SHA256_DIGEST_SIZE];
421*4882a593Smuzhiyun 	struct mptcp_sock *msk;
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	subflow_req = mptcp_subflow_rsk(req);
424*4882a593Smuzhiyun 	msk = subflow_req->msk;
425*4882a593Smuzhiyun 	if (!msk)
426*4882a593Smuzhiyun 		return false;
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	subflow_generate_hmac(msk->remote_key, msk->local_key,
429*4882a593Smuzhiyun 			      subflow_req->remote_nonce,
430*4882a593Smuzhiyun 			      subflow_req->local_nonce, hmac);
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun 
mptcp_sock_destruct(struct sock * sk)435*4882a593Smuzhiyun static void mptcp_sock_destruct(struct sock *sk)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun 	/* if new mptcp socket isn't accepted, it is free'd
438*4882a593Smuzhiyun 	 * from the tcp listener sockets request queue, linked
439*4882a593Smuzhiyun 	 * from req->sk.  The tcp socket is released.
440*4882a593Smuzhiyun 	 * This calls the ULP release function which will
441*4882a593Smuzhiyun 	 * also remove the mptcp socket, via
442*4882a593Smuzhiyun 	 * sock_put(ctx->conn).
443*4882a593Smuzhiyun 	 *
444*4882a593Smuzhiyun 	 * Problem is that the mptcp socket will be in
445*4882a593Smuzhiyun 	 * ESTABLISHED state and will not have the SOCK_DEAD flag.
446*4882a593Smuzhiyun 	 * Both result in warnings from inet_sock_destruct.
447*4882a593Smuzhiyun 	 */
448*4882a593Smuzhiyun 	if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
449*4882a593Smuzhiyun 		sk->sk_state = TCP_CLOSE;
450*4882a593Smuzhiyun 		WARN_ON_ONCE(sk->sk_socket);
451*4882a593Smuzhiyun 		sock_orphan(sk);
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	mptcp_destroy_common(mptcp_sk(sk));
455*4882a593Smuzhiyun 	inet_sock_destruct(sk);
456*4882a593Smuzhiyun }
457*4882a593Smuzhiyun 
mptcp_force_close(struct sock * sk)458*4882a593Smuzhiyun static void mptcp_force_close(struct sock *sk)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	inet_sk_state_store(sk, TCP_CLOSE);
461*4882a593Smuzhiyun 	sk_common_release(sk);
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun 
subflow_ulp_fallback(struct sock * sk,struct mptcp_subflow_context * old_ctx)464*4882a593Smuzhiyun static void subflow_ulp_fallback(struct sock *sk,
465*4882a593Smuzhiyun 				 struct mptcp_subflow_context *old_ctx)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun 	struct inet_connection_sock *icsk = inet_csk(sk);
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	mptcp_subflow_tcp_fallback(sk, old_ctx);
470*4882a593Smuzhiyun 	icsk->icsk_ulp_ops = NULL;
471*4882a593Smuzhiyun 	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
472*4882a593Smuzhiyun 	tcp_sk(sk)->is_mptcp = 0;
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun 
subflow_drop_ctx(struct sock * ssk)475*4882a593Smuzhiyun static void subflow_drop_ctx(struct sock *ssk)
476*4882a593Smuzhiyun {
477*4882a593Smuzhiyun 	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	if (!ctx)
480*4882a593Smuzhiyun 		return;
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	subflow_ulp_fallback(ssk, ctx);
483*4882a593Smuzhiyun 	if (ctx->conn)
484*4882a593Smuzhiyun 		sock_put(ctx->conn);
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	kfree_rcu(ctx, rcu);
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun 
mptcp_subflow_fully_established(struct mptcp_subflow_context * subflow,struct mptcp_options_received * mp_opt)489*4882a593Smuzhiyun void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
490*4882a593Smuzhiyun 				     struct mptcp_options_received *mp_opt)
491*4882a593Smuzhiyun {
492*4882a593Smuzhiyun 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	subflow->remote_key = mp_opt->sndr_key;
495*4882a593Smuzhiyun 	subflow->fully_established = 1;
496*4882a593Smuzhiyun 	subflow->can_ack = 1;
497*4882a593Smuzhiyun 	WRITE_ONCE(msk->fully_established, true);
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun 
subflow_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)500*4882a593Smuzhiyun static struct sock *subflow_syn_recv_sock(const struct sock *sk,
501*4882a593Smuzhiyun 					  struct sk_buff *skb,
502*4882a593Smuzhiyun 					  struct request_sock *req,
503*4882a593Smuzhiyun 					  struct dst_entry *dst,
504*4882a593Smuzhiyun 					  struct request_sock *req_unhash,
505*4882a593Smuzhiyun 					  bool *own_req)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
508*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req;
509*4882a593Smuzhiyun 	struct mptcp_options_received mp_opt;
510*4882a593Smuzhiyun 	bool fallback, fallback_is_fatal;
511*4882a593Smuzhiyun 	struct sock *new_msk = NULL;
512*4882a593Smuzhiyun 	struct sock *child;
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	/* After child creation we must look for 'mp_capable' even when options
517*4882a593Smuzhiyun 	 * are not parsed
518*4882a593Smuzhiyun 	 */
519*4882a593Smuzhiyun 	mp_opt.mp_capable = 0;
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	/* hopefully temporary handling for MP_JOIN+syncookie */
522*4882a593Smuzhiyun 	subflow_req = mptcp_subflow_rsk(req);
523*4882a593Smuzhiyun 	fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
524*4882a593Smuzhiyun 	fallback = !tcp_rsk(req)->is_mptcp;
525*4882a593Smuzhiyun 	if (fallback)
526*4882a593Smuzhiyun 		goto create_child;
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
529*4882a593Smuzhiyun 	if (subflow_req->mp_capable) {
530*4882a593Smuzhiyun 		/* we can receive and accept an in-window, out-of-order pkt,
531*4882a593Smuzhiyun 		 * which may not carry the MP_CAPABLE opt even on mptcp enabled
532*4882a593Smuzhiyun 		 * paths: always try to extract the peer key, and fallback
533*4882a593Smuzhiyun 		 * for packets missing it.
534*4882a593Smuzhiyun 		 * Even OoO DSS packets coming legitly after dropped or
535*4882a593Smuzhiyun 		 * reordered MPC will cause fallback, but we don't have other
536*4882a593Smuzhiyun 		 * options.
537*4882a593Smuzhiyun 		 */
538*4882a593Smuzhiyun 		mptcp_get_options(skb, &mp_opt);
539*4882a593Smuzhiyun 		if (!mp_opt.mp_capable) {
540*4882a593Smuzhiyun 			fallback = true;
541*4882a593Smuzhiyun 			goto create_child;
542*4882a593Smuzhiyun 		}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 		new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
545*4882a593Smuzhiyun 		if (!new_msk)
546*4882a593Smuzhiyun 			fallback = true;
547*4882a593Smuzhiyun 	} else if (subflow_req->mp_join) {
548*4882a593Smuzhiyun 		mptcp_get_options(skb, &mp_opt);
549*4882a593Smuzhiyun 		if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
550*4882a593Smuzhiyun 		    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
551*4882a593Smuzhiyun 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
552*4882a593Smuzhiyun 			fallback = true;
553*4882a593Smuzhiyun 		}
554*4882a593Smuzhiyun 	}
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun create_child:
557*4882a593Smuzhiyun 	child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
558*4882a593Smuzhiyun 						     req_unhash, own_req);
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	if (child && *own_req) {
561*4882a593Smuzhiyun 		struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 		tcp_rsk(req)->drop_req = false;
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 		/* we need to fallback on ctx allocation failure and on pre-reqs
566*4882a593Smuzhiyun 		 * checking above. In the latter scenario we additionally need
567*4882a593Smuzhiyun 		 * to reset the context to non MPTCP status.
568*4882a593Smuzhiyun 		 */
569*4882a593Smuzhiyun 		if (!ctx || fallback) {
570*4882a593Smuzhiyun 			if (fallback_is_fatal)
571*4882a593Smuzhiyun 				goto dispose_child;
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 			subflow_drop_ctx(child);
574*4882a593Smuzhiyun 			goto out;
575*4882a593Smuzhiyun 		}
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 		if (ctx->mp_capable) {
578*4882a593Smuzhiyun 			/* this can't race with mptcp_close(), as the msk is
579*4882a593Smuzhiyun 			 * not yet exposted to user-space
580*4882a593Smuzhiyun 			 */
581*4882a593Smuzhiyun 			inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 			/* new mpc subflow takes ownership of the newly
584*4882a593Smuzhiyun 			 * created mptcp socket
585*4882a593Smuzhiyun 			 */
586*4882a593Smuzhiyun 			new_msk->sk_destruct = mptcp_sock_destruct;
587*4882a593Smuzhiyun 			mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
588*4882a593Smuzhiyun 			mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
589*4882a593Smuzhiyun 			ctx->conn = new_msk;
590*4882a593Smuzhiyun 			new_msk = NULL;
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 			/* with OoO packets we can reach here without ingress
593*4882a593Smuzhiyun 			 * mpc option
594*4882a593Smuzhiyun 			 */
595*4882a593Smuzhiyun 			if (mp_opt.mp_capable)
596*4882a593Smuzhiyun 				mptcp_subflow_fully_established(ctx, &mp_opt);
597*4882a593Smuzhiyun 		} else if (ctx->mp_join) {
598*4882a593Smuzhiyun 			struct mptcp_sock *owner;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 			owner = subflow_req->msk;
601*4882a593Smuzhiyun 			if (!owner)
602*4882a593Smuzhiyun 				goto dispose_child;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 			/* move the msk reference ownership to the subflow */
605*4882a593Smuzhiyun 			subflow_req->msk = NULL;
606*4882a593Smuzhiyun 			ctx->conn = (struct sock *)owner;
607*4882a593Smuzhiyun 			if (!mptcp_finish_join(child))
608*4882a593Smuzhiyun 				goto dispose_child;
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
611*4882a593Smuzhiyun 			tcp_rsk(req)->drop_req = true;
612*4882a593Smuzhiyun 		}
613*4882a593Smuzhiyun 	}
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun out:
616*4882a593Smuzhiyun 	/* dispose of the left over mptcp master, if any */
617*4882a593Smuzhiyun 	if (unlikely(new_msk))
618*4882a593Smuzhiyun 		mptcp_force_close(new_msk);
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	/* check for expected invariant - should never trigger, just help
621*4882a593Smuzhiyun 	 * catching eariler subtle bugs
622*4882a593Smuzhiyun 	 */
623*4882a593Smuzhiyun 	WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
624*4882a593Smuzhiyun 		     (!mptcp_subflow_ctx(child) ||
625*4882a593Smuzhiyun 		      !mptcp_subflow_ctx(child)->conn));
626*4882a593Smuzhiyun 	return child;
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun dispose_child:
629*4882a593Smuzhiyun 	subflow_drop_ctx(child);
630*4882a593Smuzhiyun 	tcp_rsk(req)->drop_req = true;
631*4882a593Smuzhiyun 	inet_csk_prepare_for_destroy_sock(child);
632*4882a593Smuzhiyun 	tcp_done(child);
633*4882a593Smuzhiyun 	req->rsk_ops->send_reset(sk, skb);
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	/* The last child reference will be released by the caller */
636*4882a593Smuzhiyun 	return child;
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun static struct inet_connection_sock_af_ops subflow_specific;
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun enum mapping_status {
642*4882a593Smuzhiyun 	MAPPING_OK,
643*4882a593Smuzhiyun 	MAPPING_INVALID,
644*4882a593Smuzhiyun 	MAPPING_EMPTY,
645*4882a593Smuzhiyun 	MAPPING_DATA_FIN,
646*4882a593Smuzhiyun 	MAPPING_DUMMY
647*4882a593Smuzhiyun };
648*4882a593Smuzhiyun 
expand_seq(u64 old_seq,u16 old_data_len,u64 seq)649*4882a593Smuzhiyun static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun 	if ((u32)seq == (u32)old_seq)
652*4882a593Smuzhiyun 		return old_seq;
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	/* Assume map covers data not mapped yet. */
655*4882a593Smuzhiyun 	return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun 
dbg_bad_map(struct mptcp_subflow_context * subflow,u32 ssn)658*4882a593Smuzhiyun static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
659*4882a593Smuzhiyun {
660*4882a593Smuzhiyun 	pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
661*4882a593Smuzhiyun 		 ssn, subflow->map_subflow_seq, subflow->map_data_len);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun 
skb_is_fully_mapped(struct sock * ssk,struct sk_buff * skb)664*4882a593Smuzhiyun static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
665*4882a593Smuzhiyun {
666*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
667*4882a593Smuzhiyun 	unsigned int skb_consumed;
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
670*4882a593Smuzhiyun 	if (WARN_ON_ONCE(skb_consumed >= skb->len))
671*4882a593Smuzhiyun 		return true;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	return skb->len - skb_consumed <= subflow->map_data_len -
674*4882a593Smuzhiyun 					  mptcp_subflow_get_map_offset(subflow);
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun 
validate_mapping(struct sock * ssk,struct sk_buff * skb)677*4882a593Smuzhiyun static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
678*4882a593Smuzhiyun {
679*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
680*4882a593Smuzhiyun 	u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	if (unlikely(before(ssn, subflow->map_subflow_seq))) {
683*4882a593Smuzhiyun 		/* Mapping covers data later in the subflow stream,
684*4882a593Smuzhiyun 		 * currently unsupported.
685*4882a593Smuzhiyun 		 */
686*4882a593Smuzhiyun 		dbg_bad_map(subflow, ssn);
687*4882a593Smuzhiyun 		return false;
688*4882a593Smuzhiyun 	}
689*4882a593Smuzhiyun 	if (unlikely(!before(ssn, subflow->map_subflow_seq +
690*4882a593Smuzhiyun 				  subflow->map_data_len))) {
691*4882a593Smuzhiyun 		/* Mapping does covers past subflow data, invalid */
692*4882a593Smuzhiyun 		dbg_bad_map(subflow, ssn);
693*4882a593Smuzhiyun 		return false;
694*4882a593Smuzhiyun 	}
695*4882a593Smuzhiyun 	return true;
696*4882a593Smuzhiyun }
697*4882a593Smuzhiyun 
get_mapping_status(struct sock * ssk,struct mptcp_sock * msk)698*4882a593Smuzhiyun static enum mapping_status get_mapping_status(struct sock *ssk,
699*4882a593Smuzhiyun 					      struct mptcp_sock *msk)
700*4882a593Smuzhiyun {
701*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
702*4882a593Smuzhiyun 	struct mptcp_ext *mpext;
703*4882a593Smuzhiyun 	struct sk_buff *skb;
704*4882a593Smuzhiyun 	u16 data_len;
705*4882a593Smuzhiyun 	u64 map_seq;
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	skb = skb_peek(&ssk->sk_receive_queue);
708*4882a593Smuzhiyun 	if (!skb)
709*4882a593Smuzhiyun 		return MAPPING_EMPTY;
710*4882a593Smuzhiyun 
711*4882a593Smuzhiyun 	if (mptcp_check_fallback(ssk))
712*4882a593Smuzhiyun 		return MAPPING_DUMMY;
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 	mpext = mptcp_get_ext(skb);
715*4882a593Smuzhiyun 	if (!mpext || !mpext->use_map) {
716*4882a593Smuzhiyun 		if (!subflow->map_valid && !skb->len) {
717*4882a593Smuzhiyun 			/* the TCP stack deliver 0 len FIN pkt to the receive
718*4882a593Smuzhiyun 			 * queue, that is the only 0len pkts ever expected here,
719*4882a593Smuzhiyun 			 * and we can admit no mapping only for 0 len pkts
720*4882a593Smuzhiyun 			 */
721*4882a593Smuzhiyun 			if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
722*4882a593Smuzhiyun 				WARN_ONCE(1, "0len seq %d:%d flags %x",
723*4882a593Smuzhiyun 					  TCP_SKB_CB(skb)->seq,
724*4882a593Smuzhiyun 					  TCP_SKB_CB(skb)->end_seq,
725*4882a593Smuzhiyun 					  TCP_SKB_CB(skb)->tcp_flags);
726*4882a593Smuzhiyun 			sk_eat_skb(ssk, skb);
727*4882a593Smuzhiyun 			return MAPPING_EMPTY;
728*4882a593Smuzhiyun 		}
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 		if (!subflow->map_valid)
731*4882a593Smuzhiyun 			return MAPPING_INVALID;
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 		goto validate_seq;
734*4882a593Smuzhiyun 	}
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 	pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
737*4882a593Smuzhiyun 		 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
738*4882a593Smuzhiyun 		 mpext->data_len, mpext->data_fin);
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	data_len = mpext->data_len;
741*4882a593Smuzhiyun 	if (data_len == 0) {
742*4882a593Smuzhiyun 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
743*4882a593Smuzhiyun 		return MAPPING_INVALID;
744*4882a593Smuzhiyun 	}
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	if (mpext->data_fin == 1) {
747*4882a593Smuzhiyun 		if (data_len == 1) {
748*4882a593Smuzhiyun 			bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
749*4882a593Smuzhiyun 								 mpext->dsn64);
750*4882a593Smuzhiyun 			pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
751*4882a593Smuzhiyun 			if (subflow->map_valid) {
752*4882a593Smuzhiyun 				/* A DATA_FIN might arrive in a DSS
753*4882a593Smuzhiyun 				 * option before the previous mapping
754*4882a593Smuzhiyun 				 * has been fully consumed. Continue
755*4882a593Smuzhiyun 				 * handling the existing mapping.
756*4882a593Smuzhiyun 				 */
757*4882a593Smuzhiyun 				skb_ext_del(skb, SKB_EXT_MPTCP);
758*4882a593Smuzhiyun 				return MAPPING_OK;
759*4882a593Smuzhiyun 			} else {
760*4882a593Smuzhiyun 				if (updated && schedule_work(&msk->work))
761*4882a593Smuzhiyun 					sock_hold((struct sock *)msk);
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 				return MAPPING_DATA_FIN;
764*4882a593Smuzhiyun 			}
765*4882a593Smuzhiyun 		} else {
766*4882a593Smuzhiyun 			u64 data_fin_seq = mpext->data_seq + data_len - 1;
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 			/* If mpext->data_seq is a 32-bit value, data_fin_seq
769*4882a593Smuzhiyun 			 * must also be limited to 32 bits.
770*4882a593Smuzhiyun 			 */
771*4882a593Smuzhiyun 			if (!mpext->dsn64)
772*4882a593Smuzhiyun 				data_fin_seq &= GENMASK_ULL(31, 0);
773*4882a593Smuzhiyun 
774*4882a593Smuzhiyun 			mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
775*4882a593Smuzhiyun 			pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
776*4882a593Smuzhiyun 				 data_fin_seq, mpext->dsn64);
777*4882a593Smuzhiyun 		}
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 		/* Adjust for DATA_FIN using 1 byte of sequence space */
780*4882a593Smuzhiyun 		data_len--;
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 	if (!mpext->dsn64) {
784*4882a593Smuzhiyun 		map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
785*4882a593Smuzhiyun 				     mpext->data_seq);
786*4882a593Smuzhiyun 		pr_debug("expanded seq=%llu", subflow->map_seq);
787*4882a593Smuzhiyun 	} else {
788*4882a593Smuzhiyun 		map_seq = mpext->data_seq;
789*4882a593Smuzhiyun 	}
790*4882a593Smuzhiyun 	WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	if (subflow->map_valid) {
793*4882a593Smuzhiyun 		/* Allow replacing only with an identical map */
794*4882a593Smuzhiyun 		if (subflow->map_seq == map_seq &&
795*4882a593Smuzhiyun 		    subflow->map_subflow_seq == mpext->subflow_seq &&
796*4882a593Smuzhiyun 		    subflow->map_data_len == data_len) {
797*4882a593Smuzhiyun 			skb_ext_del(skb, SKB_EXT_MPTCP);
798*4882a593Smuzhiyun 			return MAPPING_OK;
799*4882a593Smuzhiyun 		}
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		/* If this skb data are fully covered by the current mapping,
802*4882a593Smuzhiyun 		 * the new map would need caching, which is not supported
803*4882a593Smuzhiyun 		 */
804*4882a593Smuzhiyun 		if (skb_is_fully_mapped(ssk, skb)) {
805*4882a593Smuzhiyun 			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
806*4882a593Smuzhiyun 			return MAPPING_INVALID;
807*4882a593Smuzhiyun 		}
808*4882a593Smuzhiyun 
809*4882a593Smuzhiyun 		/* will validate the next map after consuming the current one */
810*4882a593Smuzhiyun 		return MAPPING_OK;
811*4882a593Smuzhiyun 	}
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 	subflow->map_seq = map_seq;
814*4882a593Smuzhiyun 	subflow->map_subflow_seq = mpext->subflow_seq;
815*4882a593Smuzhiyun 	subflow->map_data_len = data_len;
816*4882a593Smuzhiyun 	subflow->map_valid = 1;
817*4882a593Smuzhiyun 	subflow->mpc_map = mpext->mpc_map;
818*4882a593Smuzhiyun 	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
819*4882a593Smuzhiyun 		 subflow->map_seq, subflow->map_subflow_seq,
820*4882a593Smuzhiyun 		 subflow->map_data_len);
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun validate_seq:
823*4882a593Smuzhiyun 	/* we revalidate valid mapping on new skb, because we must ensure
824*4882a593Smuzhiyun 	 * the current skb is completely covered by the available mapping
825*4882a593Smuzhiyun 	 */
826*4882a593Smuzhiyun 	if (!validate_mapping(ssk, skb))
827*4882a593Smuzhiyun 		return MAPPING_INVALID;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	skb_ext_del(skb, SKB_EXT_MPTCP);
830*4882a593Smuzhiyun 	return MAPPING_OK;
831*4882a593Smuzhiyun }
832*4882a593Smuzhiyun 
mptcp_subflow_discard_data(struct sock * ssk,struct sk_buff * skb,u64 limit)833*4882a593Smuzhiyun static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
834*4882a593Smuzhiyun 				       u64 limit)
835*4882a593Smuzhiyun {
836*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
837*4882a593Smuzhiyun 	bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
838*4882a593Smuzhiyun 	u32 incr;
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun 	incr = limit >= skb->len ? skb->len + fin : limit;
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun 	pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
843*4882a593Smuzhiyun 		 subflow->map_subflow_seq);
844*4882a593Smuzhiyun 	MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
845*4882a593Smuzhiyun 	tcp_sk(ssk)->copied_seq += incr;
846*4882a593Smuzhiyun 	if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
847*4882a593Smuzhiyun 		sk_eat_skb(ssk, skb);
848*4882a593Smuzhiyun 	if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
849*4882a593Smuzhiyun 		subflow->map_valid = 0;
850*4882a593Smuzhiyun 	if (incr)
851*4882a593Smuzhiyun 		tcp_cleanup_rbuf(ssk, incr);
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun 
subflow_check_data_avail(struct sock * ssk)854*4882a593Smuzhiyun static bool subflow_check_data_avail(struct sock *ssk)
855*4882a593Smuzhiyun {
856*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
857*4882a593Smuzhiyun 	enum mapping_status status;
858*4882a593Smuzhiyun 	struct mptcp_sock *msk;
859*4882a593Smuzhiyun 	struct sk_buff *skb;
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 	pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
862*4882a593Smuzhiyun 		 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
863*4882a593Smuzhiyun 	if (!skb_peek(&ssk->sk_receive_queue))
864*4882a593Smuzhiyun 		subflow->data_avail = 0;
865*4882a593Smuzhiyun 	if (subflow->data_avail)
866*4882a593Smuzhiyun 		return true;
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	msk = mptcp_sk(subflow->conn);
869*4882a593Smuzhiyun 	for (;;) {
870*4882a593Smuzhiyun 		u64 ack_seq;
871*4882a593Smuzhiyun 		u64 old_ack;
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 		status = get_mapping_status(ssk, msk);
874*4882a593Smuzhiyun 		pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
875*4882a593Smuzhiyun 		if (status == MAPPING_INVALID) {
876*4882a593Smuzhiyun 			ssk->sk_err = EBADMSG;
877*4882a593Smuzhiyun 			goto fatal;
878*4882a593Smuzhiyun 		}
879*4882a593Smuzhiyun 		if (status == MAPPING_DUMMY) {
880*4882a593Smuzhiyun 			__mptcp_do_fallback(msk);
881*4882a593Smuzhiyun 			skb = skb_peek(&ssk->sk_receive_queue);
882*4882a593Smuzhiyun 			subflow->map_valid = 1;
883*4882a593Smuzhiyun 			subflow->map_seq = READ_ONCE(msk->ack_seq);
884*4882a593Smuzhiyun 			subflow->map_data_len = skb->len;
885*4882a593Smuzhiyun 			subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
886*4882a593Smuzhiyun 						   subflow->ssn_offset;
887*4882a593Smuzhiyun 			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
888*4882a593Smuzhiyun 			return true;
889*4882a593Smuzhiyun 		}
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun 		if (status != MAPPING_OK)
892*4882a593Smuzhiyun 			return false;
893*4882a593Smuzhiyun 
894*4882a593Smuzhiyun 		skb = skb_peek(&ssk->sk_receive_queue);
895*4882a593Smuzhiyun 		if (WARN_ON_ONCE(!skb))
896*4882a593Smuzhiyun 			return false;
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun 		/* if msk lacks the remote key, this subflow must provide an
899*4882a593Smuzhiyun 		 * MP_CAPABLE-based mapping
900*4882a593Smuzhiyun 		 */
901*4882a593Smuzhiyun 		if (unlikely(!READ_ONCE(msk->can_ack))) {
902*4882a593Smuzhiyun 			if (!subflow->mpc_map) {
903*4882a593Smuzhiyun 				ssk->sk_err = EBADMSG;
904*4882a593Smuzhiyun 				goto fatal;
905*4882a593Smuzhiyun 			}
906*4882a593Smuzhiyun 			WRITE_ONCE(msk->remote_key, subflow->remote_key);
907*4882a593Smuzhiyun 			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
908*4882a593Smuzhiyun 			WRITE_ONCE(msk->can_ack, true);
909*4882a593Smuzhiyun 		}
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 		old_ack = READ_ONCE(msk->ack_seq);
912*4882a593Smuzhiyun 		ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
913*4882a593Smuzhiyun 		pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
914*4882a593Smuzhiyun 			 ack_seq);
915*4882a593Smuzhiyun 		if (ack_seq == old_ack) {
916*4882a593Smuzhiyun 			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
917*4882a593Smuzhiyun 			break;
918*4882a593Smuzhiyun 		} else if (after64(ack_seq, old_ack)) {
919*4882a593Smuzhiyun 			subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
920*4882a593Smuzhiyun 			break;
921*4882a593Smuzhiyun 		}
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 		/* only accept in-sequence mapping. Old values are spurious
924*4882a593Smuzhiyun 		 * retransmission
925*4882a593Smuzhiyun 		 */
926*4882a593Smuzhiyun 		mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
927*4882a593Smuzhiyun 	}
928*4882a593Smuzhiyun 	return true;
929*4882a593Smuzhiyun 
930*4882a593Smuzhiyun fatal:
931*4882a593Smuzhiyun 	/* fatal protocol error, close the socket */
932*4882a593Smuzhiyun 	/* This barrier is coupled with smp_rmb() in tcp_poll() */
933*4882a593Smuzhiyun 	smp_wmb();
934*4882a593Smuzhiyun 	ssk->sk_error_report(ssk);
935*4882a593Smuzhiyun 	tcp_set_state(ssk, TCP_CLOSE);
936*4882a593Smuzhiyun 	tcp_send_active_reset(ssk, GFP_ATOMIC);
937*4882a593Smuzhiyun 	subflow->data_avail = 0;
938*4882a593Smuzhiyun 	return false;
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun 
mptcp_subflow_data_available(struct sock * sk)941*4882a593Smuzhiyun bool mptcp_subflow_data_available(struct sock *sk)
942*4882a593Smuzhiyun {
943*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 	/* check if current mapping is still valid */
946*4882a593Smuzhiyun 	if (subflow->map_valid &&
947*4882a593Smuzhiyun 	    mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
948*4882a593Smuzhiyun 		subflow->map_valid = 0;
949*4882a593Smuzhiyun 		subflow->data_avail = 0;
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun 		pr_debug("Done with mapping: seq=%u data_len=%u",
952*4882a593Smuzhiyun 			 subflow->map_subflow_seq,
953*4882a593Smuzhiyun 			 subflow->map_data_len);
954*4882a593Smuzhiyun 	}
955*4882a593Smuzhiyun 
956*4882a593Smuzhiyun 	return subflow_check_data_avail(sk);
957*4882a593Smuzhiyun }
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
960*4882a593Smuzhiyun  * not the ssk one.
961*4882a593Smuzhiyun  *
962*4882a593Smuzhiyun  * In mptcp, rwin is about the mptcp-level connection data.
963*4882a593Smuzhiyun  *
964*4882a593Smuzhiyun  * Data that is still on the ssk rx queue can thus be ignored,
965*4882a593Smuzhiyun  * as far as mptcp peer is concerened that data is still inflight.
966*4882a593Smuzhiyun  * DSS ACK is updated when skb is moved to the mptcp rx queue.
967*4882a593Smuzhiyun  */
mptcp_space(const struct sock * ssk,int * space,int * full_space)968*4882a593Smuzhiyun void mptcp_space(const struct sock *ssk, int *space, int *full_space)
969*4882a593Smuzhiyun {
970*4882a593Smuzhiyun 	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
971*4882a593Smuzhiyun 	const struct sock *sk = subflow->conn;
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 	*space = tcp_space(sk);
974*4882a593Smuzhiyun 	*full_space = tcp_full_space(sk);
975*4882a593Smuzhiyun }
976*4882a593Smuzhiyun 
subflow_data_ready(struct sock * sk)977*4882a593Smuzhiyun static void subflow_data_ready(struct sock *sk)
978*4882a593Smuzhiyun {
979*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
980*4882a593Smuzhiyun 	u16 state = 1 << inet_sk_state_load(sk);
981*4882a593Smuzhiyun 	struct sock *parent = subflow->conn;
982*4882a593Smuzhiyun 	struct mptcp_sock *msk;
983*4882a593Smuzhiyun 
984*4882a593Smuzhiyun 	msk = mptcp_sk(parent);
985*4882a593Smuzhiyun 	if (state & TCPF_LISTEN) {
986*4882a593Smuzhiyun 		/* MPJ subflow are removed from accept queue before reaching here,
987*4882a593Smuzhiyun 		 * avoid stray wakeups
988*4882a593Smuzhiyun 		 */
989*4882a593Smuzhiyun 		if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
990*4882a593Smuzhiyun 			return;
991*4882a593Smuzhiyun 
992*4882a593Smuzhiyun 		set_bit(MPTCP_DATA_READY, &msk->flags);
993*4882a593Smuzhiyun 		parent->sk_data_ready(parent);
994*4882a593Smuzhiyun 		return;
995*4882a593Smuzhiyun 	}
996*4882a593Smuzhiyun 
997*4882a593Smuzhiyun 	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
998*4882a593Smuzhiyun 		     !subflow->mp_join && !(state & TCPF_CLOSE));
999*4882a593Smuzhiyun 
1000*4882a593Smuzhiyun 	if (mptcp_subflow_data_available(sk))
1001*4882a593Smuzhiyun 		mptcp_data_ready(parent, sk);
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun 
subflow_write_space(struct sock * sk)1004*4882a593Smuzhiyun static void subflow_write_space(struct sock *sk)
1005*4882a593Smuzhiyun {
1006*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1007*4882a593Smuzhiyun 	struct sock *parent = subflow->conn;
1008*4882a593Smuzhiyun 
1009*4882a593Smuzhiyun 	if (!sk_stream_is_writeable(sk))
1010*4882a593Smuzhiyun 		return;
1011*4882a593Smuzhiyun 
1012*4882a593Smuzhiyun 	if (sk_stream_is_writeable(parent)) {
1013*4882a593Smuzhiyun 		set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
1014*4882a593Smuzhiyun 		smp_mb__after_atomic();
1015*4882a593Smuzhiyun 		/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
1016*4882a593Smuzhiyun 		sk_stream_write_space(parent);
1017*4882a593Smuzhiyun 	}
1018*4882a593Smuzhiyun }
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock * sk)1021*4882a593Smuzhiyun subflow_default_af_ops(struct sock *sk)
1022*4882a593Smuzhiyun {
1023*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1024*4882a593Smuzhiyun 	if (sk->sk_family == AF_INET6)
1025*4882a593Smuzhiyun 		return &subflow_v6_specific;
1026*4882a593Smuzhiyun #endif
1027*4882a593Smuzhiyun 	return &subflow_specific;
1028*4882a593Smuzhiyun }
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
mptcpv6_handle_mapped(struct sock * sk,bool mapped)1031*4882a593Smuzhiyun void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
1032*4882a593Smuzhiyun {
1033*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1034*4882a593Smuzhiyun 	struct inet_connection_sock *icsk = inet_csk(sk);
1035*4882a593Smuzhiyun 	struct inet_connection_sock_af_ops *target;
1036*4882a593Smuzhiyun 
1037*4882a593Smuzhiyun 	target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1038*4882a593Smuzhiyun 
1039*4882a593Smuzhiyun 	pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
1040*4882a593Smuzhiyun 		 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
1041*4882a593Smuzhiyun 
1042*4882a593Smuzhiyun 	if (likely(icsk->icsk_af_ops == target))
1043*4882a593Smuzhiyun 		return;
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	subflow->icsk_af_ops = icsk->icsk_af_ops;
1046*4882a593Smuzhiyun 	icsk->icsk_af_ops = target;
1047*4882a593Smuzhiyun }
1048*4882a593Smuzhiyun #endif
1049*4882a593Smuzhiyun 
mptcp_info2sockaddr(const struct mptcp_addr_info * info,struct sockaddr_storage * addr)1050*4882a593Smuzhiyun static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
1051*4882a593Smuzhiyun 				struct sockaddr_storage *addr)
1052*4882a593Smuzhiyun {
1053*4882a593Smuzhiyun 	memset(addr, 0, sizeof(*addr));
1054*4882a593Smuzhiyun 	addr->ss_family = info->family;
1055*4882a593Smuzhiyun 	if (addr->ss_family == AF_INET) {
1056*4882a593Smuzhiyun 		struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 		in_addr->sin_addr = info->addr;
1059*4882a593Smuzhiyun 		in_addr->sin_port = info->port;
1060*4882a593Smuzhiyun 	}
1061*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1062*4882a593Smuzhiyun 	else if (addr->ss_family == AF_INET6) {
1063*4882a593Smuzhiyun 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1064*4882a593Smuzhiyun 
1065*4882a593Smuzhiyun 		in6_addr->sin6_addr = info->addr6;
1066*4882a593Smuzhiyun 		in6_addr->sin6_port = info->port;
1067*4882a593Smuzhiyun 	}
1068*4882a593Smuzhiyun #endif
1069*4882a593Smuzhiyun }
1070*4882a593Smuzhiyun 
__mptcp_subflow_connect(struct sock * sk,const struct mptcp_addr_info * loc,const struct mptcp_addr_info * remote)1071*4882a593Smuzhiyun int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
1072*4882a593Smuzhiyun 			    const struct mptcp_addr_info *remote)
1073*4882a593Smuzhiyun {
1074*4882a593Smuzhiyun 	struct mptcp_sock *msk = mptcp_sk(sk);
1075*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow;
1076*4882a593Smuzhiyun 	struct sockaddr_storage addr;
1077*4882a593Smuzhiyun 	int remote_id = remote->id;
1078*4882a593Smuzhiyun 	int local_id = loc->id;
1079*4882a593Smuzhiyun 	struct socket *sf;
1080*4882a593Smuzhiyun 	struct sock *ssk;
1081*4882a593Smuzhiyun 	u32 remote_token;
1082*4882a593Smuzhiyun 	int addrlen;
1083*4882a593Smuzhiyun 	int err;
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun 	if (!mptcp_is_fully_established(sk))
1086*4882a593Smuzhiyun 		return -ENOTCONN;
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun 	err = mptcp_subflow_create_socket(sk, &sf);
1089*4882a593Smuzhiyun 	if (err)
1090*4882a593Smuzhiyun 		return err;
1091*4882a593Smuzhiyun 
1092*4882a593Smuzhiyun 	ssk = sf->sk;
1093*4882a593Smuzhiyun 	subflow = mptcp_subflow_ctx(ssk);
1094*4882a593Smuzhiyun 	do {
1095*4882a593Smuzhiyun 		get_random_bytes(&subflow->local_nonce, sizeof(u32));
1096*4882a593Smuzhiyun 	} while (!subflow->local_nonce);
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	if (!local_id) {
1099*4882a593Smuzhiyun 		err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1100*4882a593Smuzhiyun 		if (err < 0)
1101*4882a593Smuzhiyun 			goto failed;
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 		local_id = err;
1104*4882a593Smuzhiyun 	}
1105*4882a593Smuzhiyun 
1106*4882a593Smuzhiyun 	subflow->remote_key = msk->remote_key;
1107*4882a593Smuzhiyun 	subflow->local_key = msk->local_key;
1108*4882a593Smuzhiyun 	subflow->token = msk->token;
1109*4882a593Smuzhiyun 	mptcp_info2sockaddr(loc, &addr);
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 	addrlen = sizeof(struct sockaddr_in);
1112*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1113*4882a593Smuzhiyun 	if (loc->family == AF_INET6)
1114*4882a593Smuzhiyun 		addrlen = sizeof(struct sockaddr_in6);
1115*4882a593Smuzhiyun #endif
1116*4882a593Smuzhiyun 	ssk->sk_bound_dev_if = loc->ifindex;
1117*4882a593Smuzhiyun 	err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1118*4882a593Smuzhiyun 	if (err)
1119*4882a593Smuzhiyun 		goto failed;
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun 	mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1122*4882a593Smuzhiyun 	pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
1123*4882a593Smuzhiyun 		 remote_token, local_id, remote_id);
1124*4882a593Smuzhiyun 	subflow->remote_token = remote_token;
1125*4882a593Smuzhiyun 	subflow->local_id = local_id;
1126*4882a593Smuzhiyun 	subflow->remote_id = remote_id;
1127*4882a593Smuzhiyun 	subflow->request_join = 1;
1128*4882a593Smuzhiyun 	subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
1129*4882a593Smuzhiyun 	mptcp_info2sockaddr(remote, &addr);
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun 	err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1132*4882a593Smuzhiyun 	if (err && err != -EINPROGRESS)
1133*4882a593Smuzhiyun 		goto failed;
1134*4882a593Smuzhiyun 
1135*4882a593Smuzhiyun 	spin_lock_bh(&msk->join_list_lock);
1136*4882a593Smuzhiyun 	list_add_tail(&subflow->node, &msk->join_list);
1137*4882a593Smuzhiyun 	spin_unlock_bh(&msk->join_list_lock);
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	return err;
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun failed:
1142*4882a593Smuzhiyun 	sock_release(sf);
1143*4882a593Smuzhiyun 	return err;
1144*4882a593Smuzhiyun }
1145*4882a593Smuzhiyun 
mptcp_subflow_create_socket(struct sock * sk,struct socket ** new_sock)1146*4882a593Smuzhiyun int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1147*4882a593Smuzhiyun {
1148*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow;
1149*4882a593Smuzhiyun 	struct net *net = sock_net(sk);
1150*4882a593Smuzhiyun 	struct socket *sf;
1151*4882a593Smuzhiyun 	int err;
1152*4882a593Smuzhiyun 
1153*4882a593Smuzhiyun 	/* un-accepted server sockets can reach here - on bad configuration
1154*4882a593Smuzhiyun 	 * bail early to avoid greater trouble later
1155*4882a593Smuzhiyun 	 */
1156*4882a593Smuzhiyun 	if (unlikely(!sk->sk_socket))
1157*4882a593Smuzhiyun 		return -EINVAL;
1158*4882a593Smuzhiyun 
1159*4882a593Smuzhiyun 	err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1160*4882a593Smuzhiyun 			       &sf);
1161*4882a593Smuzhiyun 	if (err)
1162*4882a593Smuzhiyun 		return err;
1163*4882a593Smuzhiyun 
1164*4882a593Smuzhiyun 	lock_sock(sf->sk);
1165*4882a593Smuzhiyun 
1166*4882a593Smuzhiyun 	/* kernel sockets do not by default acquire net ref, but TCP timer
1167*4882a593Smuzhiyun 	 * needs it.
1168*4882a593Smuzhiyun 	 */
1169*4882a593Smuzhiyun 	sf->sk->sk_net_refcnt = 1;
1170*4882a593Smuzhiyun 	get_net(net);
1171*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
1172*4882a593Smuzhiyun 	this_cpu_add(*net->core.sock_inuse, 1);
1173*4882a593Smuzhiyun #endif
1174*4882a593Smuzhiyun 	err = tcp_set_ulp(sf->sk, "mptcp");
1175*4882a593Smuzhiyun 	release_sock(sf->sk);
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun 	if (err) {
1178*4882a593Smuzhiyun 		sock_release(sf);
1179*4882a593Smuzhiyun 		return err;
1180*4882a593Smuzhiyun 	}
1181*4882a593Smuzhiyun 
1182*4882a593Smuzhiyun 	/* the newly created socket really belongs to the owning MPTCP master
1183*4882a593Smuzhiyun 	 * socket, even if for additional subflows the allocation is performed
1184*4882a593Smuzhiyun 	 * by a kernel workqueue. Adjust inode references, so that the
1185*4882a593Smuzhiyun 	 * procfs/diag interaces really show this one belonging to the correct
1186*4882a593Smuzhiyun 	 * user.
1187*4882a593Smuzhiyun 	 */
1188*4882a593Smuzhiyun 	SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1189*4882a593Smuzhiyun 	SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1190*4882a593Smuzhiyun 	SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1191*4882a593Smuzhiyun 
1192*4882a593Smuzhiyun 	subflow = mptcp_subflow_ctx(sf->sk);
1193*4882a593Smuzhiyun 	pr_debug("subflow=%p", subflow);
1194*4882a593Smuzhiyun 
1195*4882a593Smuzhiyun 	*new_sock = sf;
1196*4882a593Smuzhiyun 	sock_hold(sk);
1197*4882a593Smuzhiyun 	subflow->conn = sk;
1198*4882a593Smuzhiyun 
1199*4882a593Smuzhiyun 	return 0;
1200*4882a593Smuzhiyun }
1201*4882a593Smuzhiyun 
subflow_create_ctx(struct sock * sk,gfp_t priority)1202*4882a593Smuzhiyun static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1203*4882a593Smuzhiyun 							gfp_t priority)
1204*4882a593Smuzhiyun {
1205*4882a593Smuzhiyun 	struct inet_connection_sock *icsk = inet_csk(sk);
1206*4882a593Smuzhiyun 	struct mptcp_subflow_context *ctx;
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun 	ctx = kzalloc(sizeof(*ctx), priority);
1209*4882a593Smuzhiyun 	if (!ctx)
1210*4882a593Smuzhiyun 		return NULL;
1211*4882a593Smuzhiyun 
1212*4882a593Smuzhiyun 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1213*4882a593Smuzhiyun 	INIT_LIST_HEAD(&ctx->node);
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 	pr_debug("subflow=%p", ctx);
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 	ctx->tcp_sock = sk;
1218*4882a593Smuzhiyun 
1219*4882a593Smuzhiyun 	return ctx;
1220*4882a593Smuzhiyun }
1221*4882a593Smuzhiyun 
__subflow_state_change(struct sock * sk)1222*4882a593Smuzhiyun static void __subflow_state_change(struct sock *sk)
1223*4882a593Smuzhiyun {
1224*4882a593Smuzhiyun 	struct socket_wq *wq;
1225*4882a593Smuzhiyun 
1226*4882a593Smuzhiyun 	rcu_read_lock();
1227*4882a593Smuzhiyun 	wq = rcu_dereference(sk->sk_wq);
1228*4882a593Smuzhiyun 	if (skwq_has_sleeper(wq))
1229*4882a593Smuzhiyun 		wake_up_interruptible_all(&wq->wait);
1230*4882a593Smuzhiyun 	rcu_read_unlock();
1231*4882a593Smuzhiyun }
1232*4882a593Smuzhiyun 
subflow_is_done(const struct sock * sk)1233*4882a593Smuzhiyun static bool subflow_is_done(const struct sock *sk)
1234*4882a593Smuzhiyun {
1235*4882a593Smuzhiyun 	return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1236*4882a593Smuzhiyun }
1237*4882a593Smuzhiyun 
subflow_state_change(struct sock * sk)1238*4882a593Smuzhiyun static void subflow_state_change(struct sock *sk)
1239*4882a593Smuzhiyun {
1240*4882a593Smuzhiyun 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1241*4882a593Smuzhiyun 	struct sock *parent = subflow->conn;
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 	__subflow_state_change(sk);
1244*4882a593Smuzhiyun 
1245*4882a593Smuzhiyun 	if (subflow_simultaneous_connect(sk)) {
1246*4882a593Smuzhiyun 		mptcp_do_fallback(sk);
1247*4882a593Smuzhiyun 		mptcp_rcv_space_init(mptcp_sk(parent), sk);
1248*4882a593Smuzhiyun 		pr_fallback(mptcp_sk(parent));
1249*4882a593Smuzhiyun 		subflow->conn_finished = 1;
1250*4882a593Smuzhiyun 		if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1251*4882a593Smuzhiyun 			inet_sk_state_store(parent, TCP_ESTABLISHED);
1252*4882a593Smuzhiyun 			parent->sk_state_change(parent);
1253*4882a593Smuzhiyun 		}
1254*4882a593Smuzhiyun 	}
1255*4882a593Smuzhiyun 
1256*4882a593Smuzhiyun 	/* as recvmsg() does not acquire the subflow socket for ssk selection
1257*4882a593Smuzhiyun 	 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1258*4882a593Smuzhiyun 	 * the data available machinery here.
1259*4882a593Smuzhiyun 	 */
1260*4882a593Smuzhiyun 	if (mptcp_subflow_data_available(sk))
1261*4882a593Smuzhiyun 		mptcp_data_ready(parent, sk);
1262*4882a593Smuzhiyun 
1263*4882a593Smuzhiyun 	if (__mptcp_check_fallback(mptcp_sk(parent)) &&
1264*4882a593Smuzhiyun 	    !(parent->sk_shutdown & RCV_SHUTDOWN) &&
1265*4882a593Smuzhiyun 	    !subflow->rx_eof && subflow_is_done(sk)) {
1266*4882a593Smuzhiyun 		subflow->rx_eof = 1;
1267*4882a593Smuzhiyun 		mptcp_subflow_eof(parent);
1268*4882a593Smuzhiyun 	}
1269*4882a593Smuzhiyun }
1270*4882a593Smuzhiyun 
subflow_ulp_init(struct sock * sk)1271*4882a593Smuzhiyun static int subflow_ulp_init(struct sock *sk)
1272*4882a593Smuzhiyun {
1273*4882a593Smuzhiyun 	struct inet_connection_sock *icsk = inet_csk(sk);
1274*4882a593Smuzhiyun 	struct mptcp_subflow_context *ctx;
1275*4882a593Smuzhiyun 	struct tcp_sock *tp = tcp_sk(sk);
1276*4882a593Smuzhiyun 	int err = 0;
1277*4882a593Smuzhiyun 
1278*4882a593Smuzhiyun 	/* disallow attaching ULP to a socket unless it has been
1279*4882a593Smuzhiyun 	 * created with sock_create_kern()
1280*4882a593Smuzhiyun 	 */
1281*4882a593Smuzhiyun 	if (!sk->sk_kern_sock) {
1282*4882a593Smuzhiyun 		err = -EOPNOTSUPP;
1283*4882a593Smuzhiyun 		goto out;
1284*4882a593Smuzhiyun 	}
1285*4882a593Smuzhiyun 
1286*4882a593Smuzhiyun 	ctx = subflow_create_ctx(sk, GFP_KERNEL);
1287*4882a593Smuzhiyun 	if (!ctx) {
1288*4882a593Smuzhiyun 		err = -ENOMEM;
1289*4882a593Smuzhiyun 		goto out;
1290*4882a593Smuzhiyun 	}
1291*4882a593Smuzhiyun 
1292*4882a593Smuzhiyun 	pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1293*4882a593Smuzhiyun 
1294*4882a593Smuzhiyun 	tp->is_mptcp = 1;
1295*4882a593Smuzhiyun 	ctx->icsk_af_ops = icsk->icsk_af_ops;
1296*4882a593Smuzhiyun 	icsk->icsk_af_ops = subflow_default_af_ops(sk);
1297*4882a593Smuzhiyun 	ctx->tcp_data_ready = sk->sk_data_ready;
1298*4882a593Smuzhiyun 	ctx->tcp_state_change = sk->sk_state_change;
1299*4882a593Smuzhiyun 	ctx->tcp_write_space = sk->sk_write_space;
1300*4882a593Smuzhiyun 	sk->sk_data_ready = subflow_data_ready;
1301*4882a593Smuzhiyun 	sk->sk_write_space = subflow_write_space;
1302*4882a593Smuzhiyun 	sk->sk_state_change = subflow_state_change;
1303*4882a593Smuzhiyun out:
1304*4882a593Smuzhiyun 	return err;
1305*4882a593Smuzhiyun }
1306*4882a593Smuzhiyun 
subflow_ulp_release(struct sock * sk)1307*4882a593Smuzhiyun static void subflow_ulp_release(struct sock *sk)
1308*4882a593Smuzhiyun {
1309*4882a593Smuzhiyun 	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1310*4882a593Smuzhiyun 
1311*4882a593Smuzhiyun 	if (!ctx)
1312*4882a593Smuzhiyun 		return;
1313*4882a593Smuzhiyun 
1314*4882a593Smuzhiyun 	if (ctx->conn)
1315*4882a593Smuzhiyun 		sock_put(ctx->conn);
1316*4882a593Smuzhiyun 
1317*4882a593Smuzhiyun 	kfree_rcu(ctx, rcu);
1318*4882a593Smuzhiyun }
1319*4882a593Smuzhiyun 
subflow_ulp_clone(const struct request_sock * req,struct sock * newsk,const gfp_t priority)1320*4882a593Smuzhiyun static void subflow_ulp_clone(const struct request_sock *req,
1321*4882a593Smuzhiyun 			      struct sock *newsk,
1322*4882a593Smuzhiyun 			      const gfp_t priority)
1323*4882a593Smuzhiyun {
1324*4882a593Smuzhiyun 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1325*4882a593Smuzhiyun 	struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1326*4882a593Smuzhiyun 	struct mptcp_subflow_context *new_ctx;
1327*4882a593Smuzhiyun 
1328*4882a593Smuzhiyun 	if (!tcp_rsk(req)->is_mptcp ||
1329*4882a593Smuzhiyun 	    (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1330*4882a593Smuzhiyun 		subflow_ulp_fallback(newsk, old_ctx);
1331*4882a593Smuzhiyun 		return;
1332*4882a593Smuzhiyun 	}
1333*4882a593Smuzhiyun 
1334*4882a593Smuzhiyun 	new_ctx = subflow_create_ctx(newsk, priority);
1335*4882a593Smuzhiyun 	if (!new_ctx) {
1336*4882a593Smuzhiyun 		subflow_ulp_fallback(newsk, old_ctx);
1337*4882a593Smuzhiyun 		return;
1338*4882a593Smuzhiyun 	}
1339*4882a593Smuzhiyun 
1340*4882a593Smuzhiyun 	new_ctx->conn_finished = 1;
1341*4882a593Smuzhiyun 	new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1342*4882a593Smuzhiyun 	new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1343*4882a593Smuzhiyun 	new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1344*4882a593Smuzhiyun 	new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1345*4882a593Smuzhiyun 	new_ctx->rel_write_seq = 1;
1346*4882a593Smuzhiyun 	new_ctx->tcp_sock = newsk;
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 	if (subflow_req->mp_capable) {
1349*4882a593Smuzhiyun 		/* see comments in subflow_syn_recv_sock(), MPTCP connection
1350*4882a593Smuzhiyun 		 * is fully established only after we receive the remote key
1351*4882a593Smuzhiyun 		 */
1352*4882a593Smuzhiyun 		new_ctx->mp_capable = 1;
1353*4882a593Smuzhiyun 		new_ctx->local_key = subflow_req->local_key;
1354*4882a593Smuzhiyun 		new_ctx->token = subflow_req->token;
1355*4882a593Smuzhiyun 		new_ctx->ssn_offset = subflow_req->ssn_offset;
1356*4882a593Smuzhiyun 		new_ctx->idsn = subflow_req->idsn;
1357*4882a593Smuzhiyun 	} else if (subflow_req->mp_join) {
1358*4882a593Smuzhiyun 		new_ctx->ssn_offset = subflow_req->ssn_offset;
1359*4882a593Smuzhiyun 		new_ctx->mp_join = 1;
1360*4882a593Smuzhiyun 		new_ctx->fully_established = 1;
1361*4882a593Smuzhiyun 		new_ctx->backup = subflow_req->backup;
1362*4882a593Smuzhiyun 		new_ctx->local_id = subflow_req->local_id;
1363*4882a593Smuzhiyun 		new_ctx->remote_id = subflow_req->remote_id;
1364*4882a593Smuzhiyun 		new_ctx->token = subflow_req->token;
1365*4882a593Smuzhiyun 		new_ctx->thmac = subflow_req->thmac;
1366*4882a593Smuzhiyun 	}
1367*4882a593Smuzhiyun }
1368*4882a593Smuzhiyun 
1369*4882a593Smuzhiyun static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1370*4882a593Smuzhiyun 	.name		= "mptcp",
1371*4882a593Smuzhiyun 	.owner		= THIS_MODULE,
1372*4882a593Smuzhiyun 	.init		= subflow_ulp_init,
1373*4882a593Smuzhiyun 	.release	= subflow_ulp_release,
1374*4882a593Smuzhiyun 	.clone		= subflow_ulp_clone,
1375*4882a593Smuzhiyun };
1376*4882a593Smuzhiyun 
subflow_ops_init(struct request_sock_ops * subflow_ops)1377*4882a593Smuzhiyun static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1378*4882a593Smuzhiyun {
1379*4882a593Smuzhiyun 	subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1380*4882a593Smuzhiyun 	subflow_ops->slab_name = "request_sock_subflow";
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1383*4882a593Smuzhiyun 					      subflow_ops->obj_size, 0,
1384*4882a593Smuzhiyun 					      SLAB_ACCOUNT |
1385*4882a593Smuzhiyun 					      SLAB_TYPESAFE_BY_RCU,
1386*4882a593Smuzhiyun 					      NULL);
1387*4882a593Smuzhiyun 	if (!subflow_ops->slab)
1388*4882a593Smuzhiyun 		return -ENOMEM;
1389*4882a593Smuzhiyun 
1390*4882a593Smuzhiyun 	subflow_ops->destructor = subflow_req_destructor;
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 	return 0;
1393*4882a593Smuzhiyun }
1394*4882a593Smuzhiyun 
mptcp_subflow_init(void)1395*4882a593Smuzhiyun void __init mptcp_subflow_init(void)
1396*4882a593Smuzhiyun {
1397*4882a593Smuzhiyun 	mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
1398*4882a593Smuzhiyun 	if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
1399*4882a593Smuzhiyun 		panic("MPTCP: failed to init subflow request sock ops\n");
1400*4882a593Smuzhiyun 
1401*4882a593Smuzhiyun 	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1402*4882a593Smuzhiyun 	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1403*4882a593Smuzhiyun 
1404*4882a593Smuzhiyun 	subflow_specific = ipv4_specific;
1405*4882a593Smuzhiyun 	subflow_specific.conn_request = subflow_v4_conn_request;
1406*4882a593Smuzhiyun 	subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1407*4882a593Smuzhiyun 	subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1408*4882a593Smuzhiyun 
1409*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1410*4882a593Smuzhiyun 	subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1411*4882a593Smuzhiyun 	subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1412*4882a593Smuzhiyun 
1413*4882a593Smuzhiyun 	subflow_v6_specific = ipv6_specific;
1414*4882a593Smuzhiyun 	subflow_v6_specific.conn_request = subflow_v6_conn_request;
1415*4882a593Smuzhiyun 	subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1416*4882a593Smuzhiyun 	subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1417*4882a593Smuzhiyun 
1418*4882a593Smuzhiyun 	subflow_v6m_specific = subflow_v6_specific;
1419*4882a593Smuzhiyun 	subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1420*4882a593Smuzhiyun 	subflow_v6m_specific.send_check = ipv4_specific.send_check;
1421*4882a593Smuzhiyun 	subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1422*4882a593Smuzhiyun 	subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1423*4882a593Smuzhiyun 	subflow_v6m_specific.net_frag_header_len = 0;
1424*4882a593Smuzhiyun #endif
1425*4882a593Smuzhiyun 
1426*4882a593Smuzhiyun 	mptcp_diag_subflow_init(&subflow_ulp_ops);
1427*4882a593Smuzhiyun 
1428*4882a593Smuzhiyun 	if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1429*4882a593Smuzhiyun 		panic("MPTCP: failed to register subflows to ULP\n");
1430*4882a593Smuzhiyun }
1431