1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* SCTP kernel implementation
3*4882a593Smuzhiyun * Copyright (c) 1999-2000 Cisco, Inc.
4*4882a593Smuzhiyun * Copyright (c) 1999-2001 Motorola, Inc.
5*4882a593Smuzhiyun * Copyright (c) 2001-2003 International Business Machines Corp.
6*4882a593Smuzhiyun * Copyright (c) 2001 Intel Corp.
7*4882a593Smuzhiyun * Copyright (c) 2001 La Monte H.P. Yarroll
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * This file is part of the SCTP kernel implementation
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * This module provides the abstraction for an SCTP tranport representing
12*4882a593Smuzhiyun * a remote transport address. For local transport addresses, we just use
13*4882a593Smuzhiyun * union sctp_addr.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * Please send any bug reports or fixes you make to the
16*4882a593Smuzhiyun * email address(es):
17*4882a593Smuzhiyun * lksctp developers <linux-sctp@vger.kernel.org>
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * Written or modified by:
20*4882a593Smuzhiyun * La Monte H.P. Yarroll <piggy@acm.org>
21*4882a593Smuzhiyun * Karl Knutson <karl@athena.chicago.il.us>
22*4882a593Smuzhiyun * Jon Grimm <jgrimm@us.ibm.com>
23*4882a593Smuzhiyun * Xingang Guo <xingang.guo@intel.com>
24*4882a593Smuzhiyun * Hui Huang <hui.huang@nokia.com>
25*4882a593Smuzhiyun * Sridhar Samudrala <sri@us.ibm.com>
26*4882a593Smuzhiyun * Ardelle Fan <ardelle.fan@intel.com>
27*4882a593Smuzhiyun */
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun #include <linux/slab.h>
32*4882a593Smuzhiyun #include <linux/types.h>
33*4882a593Smuzhiyun #include <linux/random.h>
34*4882a593Smuzhiyun #include <net/sctp/sctp.h>
35*4882a593Smuzhiyun #include <net/sctp/sm.h>
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun /* 1st Level Abstractions. */
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /* Initialize a new transport from provided memory. */
sctp_transport_init(struct net * net,struct sctp_transport * peer,const union sctp_addr * addr,gfp_t gfp)40*4882a593Smuzhiyun static struct sctp_transport *sctp_transport_init(struct net *net,
41*4882a593Smuzhiyun struct sctp_transport *peer,
42*4882a593Smuzhiyun const union sctp_addr *addr,
43*4882a593Smuzhiyun gfp_t gfp)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun /* Copy in the address. */
46*4882a593Smuzhiyun peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
47*4882a593Smuzhiyun memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len);
48*4882a593Smuzhiyun memset(&peer->saddr, 0, sizeof(union sctp_addr));
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun peer->sack_generation = 0;
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun /* From 6.3.1 RTO Calculation:
53*4882a593Smuzhiyun *
54*4882a593Smuzhiyun * C1) Until an RTT measurement has been made for a packet sent to the
55*4882a593Smuzhiyun * given destination transport address, set RTO to the protocol
56*4882a593Smuzhiyun * parameter 'RTO.Initial'.
57*4882a593Smuzhiyun */
58*4882a593Smuzhiyun peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun peer->last_time_heard = 0;
61*4882a593Smuzhiyun peer->last_time_ecne_reduced = jiffies;
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun peer->param_flags = SPP_HB_DISABLE |
64*4882a593Smuzhiyun SPP_PMTUD_ENABLE |
65*4882a593Smuzhiyun SPP_SACKDELAY_ENABLE;
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun /* Initialize the default path max_retrans. */
68*4882a593Smuzhiyun peer->pathmaxrxt = net->sctp.max_retrans_path;
69*4882a593Smuzhiyun peer->pf_retrans = net->sctp.pf_retrans;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun INIT_LIST_HEAD(&peer->transmitted);
72*4882a593Smuzhiyun INIT_LIST_HEAD(&peer->send_ready);
73*4882a593Smuzhiyun INIT_LIST_HEAD(&peer->transports);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
76*4882a593Smuzhiyun timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
77*4882a593Smuzhiyun timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
78*4882a593Smuzhiyun timer_setup(&peer->proto_unreach_timer,
79*4882a593Smuzhiyun sctp_generate_proto_unreach_event, 0);
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /* Initialize the 64-bit random nonce sent with heartbeat. */
82*4882a593Smuzhiyun get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun refcount_set(&peer->refcnt, 1);
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun return peer;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /* Allocate and initialize a new transport. */
sctp_transport_new(struct net * net,const union sctp_addr * addr,gfp_t gfp)90*4882a593Smuzhiyun struct sctp_transport *sctp_transport_new(struct net *net,
91*4882a593Smuzhiyun const union sctp_addr *addr,
92*4882a593Smuzhiyun gfp_t gfp)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun struct sctp_transport *transport;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun transport = kzalloc(sizeof(*transport), gfp);
97*4882a593Smuzhiyun if (!transport)
98*4882a593Smuzhiyun goto fail;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun if (!sctp_transport_init(net, transport, addr, gfp))
101*4882a593Smuzhiyun goto fail_init;
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun SCTP_DBG_OBJCNT_INC(transport);
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun return transport;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun fail_init:
108*4882a593Smuzhiyun kfree(transport);
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun fail:
111*4882a593Smuzhiyun return NULL;
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun /* This transport is no longer needed. Free up if possible, or
115*4882a593Smuzhiyun * delay until it last reference count.
116*4882a593Smuzhiyun */
sctp_transport_free(struct sctp_transport * transport)117*4882a593Smuzhiyun void sctp_transport_free(struct sctp_transport *transport)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun /* Try to delete the heartbeat timer. */
120*4882a593Smuzhiyun if (del_timer(&transport->hb_timer))
121*4882a593Smuzhiyun sctp_transport_put(transport);
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun /* Delete the T3_rtx timer if it's active.
124*4882a593Smuzhiyun * There is no point in not doing this now and letting
125*4882a593Smuzhiyun * structure hang around in memory since we know
126*4882a593Smuzhiyun * the tranport is going away.
127*4882a593Smuzhiyun */
128*4882a593Smuzhiyun if (del_timer(&transport->T3_rtx_timer))
129*4882a593Smuzhiyun sctp_transport_put(transport);
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun if (del_timer(&transport->reconf_timer))
132*4882a593Smuzhiyun sctp_transport_put(transport);
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun /* Delete the ICMP proto unreachable timer if it's active. */
135*4882a593Smuzhiyun if (del_timer(&transport->proto_unreach_timer))
136*4882a593Smuzhiyun sctp_transport_put(transport);
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun sctp_transport_put(transport);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
sctp_transport_destroy_rcu(struct rcu_head * head)141*4882a593Smuzhiyun static void sctp_transport_destroy_rcu(struct rcu_head *head)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun struct sctp_transport *transport;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun transport = container_of(head, struct sctp_transport, rcu);
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun dst_release(transport->dst);
148*4882a593Smuzhiyun kfree(transport);
149*4882a593Smuzhiyun SCTP_DBG_OBJCNT_DEC(transport);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* Destroy the transport data structure.
153*4882a593Smuzhiyun * Assumes there are no more users of this structure.
154*4882a593Smuzhiyun */
sctp_transport_destroy(struct sctp_transport * transport)155*4882a593Smuzhiyun static void sctp_transport_destroy(struct sctp_transport *transport)
156*4882a593Smuzhiyun {
157*4882a593Smuzhiyun if (unlikely(refcount_read(&transport->refcnt))) {
158*4882a593Smuzhiyun WARN(1, "Attempt to destroy undead transport %p!\n", transport);
159*4882a593Smuzhiyun return;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun sctp_packet_free(&transport->packet);
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun if (transport->asoc)
165*4882a593Smuzhiyun sctp_association_put(transport->asoc);
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /* Start T3_rtx timer if it is not already running and update the heartbeat
171*4882a593Smuzhiyun * timer. This routine is called every time a DATA chunk is sent.
172*4882a593Smuzhiyun */
sctp_transport_reset_t3_rtx(struct sctp_transport * transport)173*4882a593Smuzhiyun void sctp_transport_reset_t3_rtx(struct sctp_transport *transport)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun /* RFC 2960 6.3.2 Retransmission Timer Rules
176*4882a593Smuzhiyun *
177*4882a593Smuzhiyun * R1) Every time a DATA chunk is sent to any address(including a
178*4882a593Smuzhiyun * retransmission), if the T3-rtx timer of that address is not running
179*4882a593Smuzhiyun * start it running so that it will expire after the RTO of that
180*4882a593Smuzhiyun * address.
181*4882a593Smuzhiyun */
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun if (!timer_pending(&transport->T3_rtx_timer))
184*4882a593Smuzhiyun if (!mod_timer(&transport->T3_rtx_timer,
185*4882a593Smuzhiyun jiffies + transport->rto))
186*4882a593Smuzhiyun sctp_transport_hold(transport);
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun
sctp_transport_reset_hb_timer(struct sctp_transport * transport)189*4882a593Smuzhiyun void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun unsigned long expires;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun /* When a data chunk is sent, reset the heartbeat interval. */
194*4882a593Smuzhiyun expires = jiffies + sctp_transport_timeout(transport);
195*4882a593Smuzhiyun if ((time_before(transport->hb_timer.expires, expires) ||
196*4882a593Smuzhiyun !timer_pending(&transport->hb_timer)) &&
197*4882a593Smuzhiyun !mod_timer(&transport->hb_timer,
198*4882a593Smuzhiyun expires + prandom_u32_max(transport->rto)))
199*4882a593Smuzhiyun sctp_transport_hold(transport);
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
sctp_transport_reset_reconf_timer(struct sctp_transport * transport)202*4882a593Smuzhiyun void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun if (!timer_pending(&transport->reconf_timer))
205*4882a593Smuzhiyun if (!mod_timer(&transport->reconf_timer,
206*4882a593Smuzhiyun jiffies + transport->rto))
207*4882a593Smuzhiyun sctp_transport_hold(transport);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun /* This transport has been assigned to an association.
211*4882a593Smuzhiyun * Initialize fields from the association or from the sock itself.
212*4882a593Smuzhiyun * Register the reference count in the association.
213*4882a593Smuzhiyun */
sctp_transport_set_owner(struct sctp_transport * transport,struct sctp_association * asoc)214*4882a593Smuzhiyun void sctp_transport_set_owner(struct sctp_transport *transport,
215*4882a593Smuzhiyun struct sctp_association *asoc)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun transport->asoc = asoc;
218*4882a593Smuzhiyun sctp_association_hold(asoc);
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun /* Initialize the pmtu of a transport. */
sctp_transport_pmtu(struct sctp_transport * transport,struct sock * sk)222*4882a593Smuzhiyun void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun /* If we don't have a fresh route, look one up */
225*4882a593Smuzhiyun if (!transport->dst || transport->dst->obsolete) {
226*4882a593Smuzhiyun sctp_transport_dst_release(transport);
227*4882a593Smuzhiyun transport->af_specific->get_dst(transport, &transport->saddr,
228*4882a593Smuzhiyun &transport->fl, sk);
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun if (transport->param_flags & SPP_PMTUD_DISABLE) {
232*4882a593Smuzhiyun struct sctp_association *asoc = transport->asoc;
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun if (!transport->pathmtu && asoc && asoc->pathmtu)
235*4882a593Smuzhiyun transport->pathmtu = asoc->pathmtu;
236*4882a593Smuzhiyun if (transport->pathmtu)
237*4882a593Smuzhiyun return;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun if (transport->dst)
241*4882a593Smuzhiyun transport->pathmtu = sctp_dst_mtu(transport->dst);
242*4882a593Smuzhiyun else
243*4882a593Smuzhiyun transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
sctp_transport_update_pmtu(struct sctp_transport * t,u32 pmtu)246*4882a593Smuzhiyun bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun struct dst_entry *dst = sctp_transport_dst_check(t);
249*4882a593Smuzhiyun struct sock *sk = t->asoc->base.sk;
250*4882a593Smuzhiyun bool change = true;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
253*4882a593Smuzhiyun pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
254*4882a593Smuzhiyun __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
255*4882a593Smuzhiyun /* Use default minimum segment instead */
256*4882a593Smuzhiyun pmtu = SCTP_DEFAULT_MINSEGMENT;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun pmtu = SCTP_TRUNC4(pmtu);
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun if (dst) {
261*4882a593Smuzhiyun struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
262*4882a593Smuzhiyun union sctp_addr addr;
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun pf->af->from_sk(&addr, sk);
265*4882a593Smuzhiyun pf->to_sk_daddr(&t->ipaddr, sk);
266*4882a593Smuzhiyun dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
267*4882a593Smuzhiyun pf->to_sk_daddr(&addr, sk);
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun dst = sctp_transport_dst_check(t);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun if (!dst) {
273*4882a593Smuzhiyun t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
274*4882a593Smuzhiyun dst = t->dst;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun if (dst) {
278*4882a593Smuzhiyun /* Re-fetch, as under layers may have a higher minimum size */
279*4882a593Smuzhiyun pmtu = sctp_dst_mtu(dst);
280*4882a593Smuzhiyun change = t->pathmtu != pmtu;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun t->pathmtu = pmtu;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun return change;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /* Caches the dst entry and source address for a transport's destination
288*4882a593Smuzhiyun * address.
289*4882a593Smuzhiyun */
sctp_transport_route(struct sctp_transport * transport,union sctp_addr * saddr,struct sctp_sock * opt)290*4882a593Smuzhiyun void sctp_transport_route(struct sctp_transport *transport,
291*4882a593Smuzhiyun union sctp_addr *saddr, struct sctp_sock *opt)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun struct sctp_association *asoc = transport->asoc;
294*4882a593Smuzhiyun struct sctp_af *af = transport->af_specific;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun sctp_transport_dst_release(transport);
297*4882a593Smuzhiyun af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun if (saddr)
300*4882a593Smuzhiyun memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
301*4882a593Smuzhiyun else
302*4882a593Smuzhiyun af->get_saddr(opt, transport, &transport->fl);
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun sctp_transport_pmtu(transport, sctp_opt2sk(opt));
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /* Initialize sk->sk_rcv_saddr, if the transport is the
307*4882a593Smuzhiyun * association's active path for getsockname().
308*4882a593Smuzhiyun */
309*4882a593Smuzhiyun if (transport->dst && asoc &&
310*4882a593Smuzhiyun (!asoc->peer.primary_path || transport == asoc->peer.active_path))
311*4882a593Smuzhiyun opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk);
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /* Hold a reference to a transport. */
sctp_transport_hold(struct sctp_transport * transport)315*4882a593Smuzhiyun int sctp_transport_hold(struct sctp_transport *transport)
316*4882a593Smuzhiyun {
317*4882a593Smuzhiyun return refcount_inc_not_zero(&transport->refcnt);
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun /* Release a reference to a transport and clean up
321*4882a593Smuzhiyun * if there are no more references.
322*4882a593Smuzhiyun */
sctp_transport_put(struct sctp_transport * transport)323*4882a593Smuzhiyun void sctp_transport_put(struct sctp_transport *transport)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun if (refcount_dec_and_test(&transport->refcnt))
326*4882a593Smuzhiyun sctp_transport_destroy(transport);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun /* Update transport's RTO based on the newly calculated RTT. */
sctp_transport_update_rto(struct sctp_transport * tp,__u32 rtt)330*4882a593Smuzhiyun void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun if (unlikely(!tp->rto_pending))
333*4882a593Smuzhiyun /* We should not be doing any RTO updates unless rto_pending is set. */
334*4882a593Smuzhiyun pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp);
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun if (tp->rttvar || tp->srtt) {
337*4882a593Smuzhiyun struct net *net = tp->asoc->base.net;
338*4882a593Smuzhiyun /* 6.3.1 C3) When a new RTT measurement R' is made, set
339*4882a593Smuzhiyun * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
340*4882a593Smuzhiyun * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
341*4882a593Smuzhiyun */
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun /* Note: The above algorithm has been rewritten to
344*4882a593Smuzhiyun * express rto_beta and rto_alpha as inverse powers
345*4882a593Smuzhiyun * of two.
346*4882a593Smuzhiyun * For example, assuming the default value of RTO.Alpha of
347*4882a593Smuzhiyun * 1/8, rto_alpha would be expressed as 3.
348*4882a593Smuzhiyun */
349*4882a593Smuzhiyun tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
350*4882a593Smuzhiyun + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
351*4882a593Smuzhiyun tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
352*4882a593Smuzhiyun + (rtt >> net->sctp.rto_alpha);
353*4882a593Smuzhiyun } else {
354*4882a593Smuzhiyun /* 6.3.1 C2) When the first RTT measurement R is made, set
355*4882a593Smuzhiyun * SRTT <- R, RTTVAR <- R/2.
356*4882a593Smuzhiyun */
357*4882a593Smuzhiyun tp->srtt = rtt;
358*4882a593Smuzhiyun tp->rttvar = rtt >> 1;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
362*4882a593Smuzhiyun * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
363*4882a593Smuzhiyun */
364*4882a593Smuzhiyun if (tp->rttvar == 0)
365*4882a593Smuzhiyun tp->rttvar = SCTP_CLOCK_GRANULARITY;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
368*4882a593Smuzhiyun tp->rto = tp->srtt + (tp->rttvar << 2);
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
371*4882a593Smuzhiyun * seconds then it is rounded up to RTO.Min seconds.
372*4882a593Smuzhiyun */
373*4882a593Smuzhiyun if (tp->rto < tp->asoc->rto_min)
374*4882a593Smuzhiyun tp->rto = tp->asoc->rto_min;
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
377*4882a593Smuzhiyun * at least RTO.max seconds.
378*4882a593Smuzhiyun */
379*4882a593Smuzhiyun if (tp->rto > tp->asoc->rto_max)
380*4882a593Smuzhiyun tp->rto = tp->asoc->rto_max;
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun sctp_max_rto(tp->asoc, tp);
383*4882a593Smuzhiyun tp->rtt = rtt;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun /* Reset rto_pending so that a new RTT measurement is started when a
386*4882a593Smuzhiyun * new data chunk is sent.
387*4882a593Smuzhiyun */
388*4882a593Smuzhiyun tp->rto_pending = 0;
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n",
391*4882a593Smuzhiyun __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto);
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun /* This routine updates the transport's cwnd and partial_bytes_acked
395*4882a593Smuzhiyun * parameters based on the bytes acked in the received SACK.
396*4882a593Smuzhiyun */
sctp_transport_raise_cwnd(struct sctp_transport * transport,__u32 sack_ctsn,__u32 bytes_acked)397*4882a593Smuzhiyun void sctp_transport_raise_cwnd(struct sctp_transport *transport,
398*4882a593Smuzhiyun __u32 sack_ctsn, __u32 bytes_acked)
399*4882a593Smuzhiyun {
400*4882a593Smuzhiyun struct sctp_association *asoc = transport->asoc;
401*4882a593Smuzhiyun __u32 cwnd, ssthresh, flight_size, pba, pmtu;
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun cwnd = transport->cwnd;
404*4882a593Smuzhiyun flight_size = transport->flight_size;
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun /* See if we need to exit Fast Recovery first */
407*4882a593Smuzhiyun if (asoc->fast_recovery &&
408*4882a593Smuzhiyun TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
409*4882a593Smuzhiyun asoc->fast_recovery = 0;
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun ssthresh = transport->ssthresh;
412*4882a593Smuzhiyun pba = transport->partial_bytes_acked;
413*4882a593Smuzhiyun pmtu = transport->asoc->pathmtu;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun if (cwnd <= ssthresh) {
416*4882a593Smuzhiyun /* RFC 4960 7.2.1
417*4882a593Smuzhiyun * o When cwnd is less than or equal to ssthresh, an SCTP
418*4882a593Smuzhiyun * endpoint MUST use the slow-start algorithm to increase
419*4882a593Smuzhiyun * cwnd only if the current congestion window is being fully
420*4882a593Smuzhiyun * utilized, an incoming SACK advances the Cumulative TSN
421*4882a593Smuzhiyun * Ack Point, and the data sender is not in Fast Recovery.
422*4882a593Smuzhiyun * Only when these three conditions are met can the cwnd be
423*4882a593Smuzhiyun * increased; otherwise, the cwnd MUST not be increased.
424*4882a593Smuzhiyun * If these conditions are met, then cwnd MUST be increased
425*4882a593Smuzhiyun * by, at most, the lesser of 1) the total size of the
426*4882a593Smuzhiyun * previously outstanding DATA chunk(s) acknowledged, and
427*4882a593Smuzhiyun * 2) the destination's path MTU. This upper bound protects
428*4882a593Smuzhiyun * against the ACK-Splitting attack outlined in [SAVAGE99].
429*4882a593Smuzhiyun */
430*4882a593Smuzhiyun if (asoc->fast_recovery)
431*4882a593Smuzhiyun return;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun /* The appropriate cwnd increase algorithm is performed
434*4882a593Smuzhiyun * if, and only if the congestion window is being fully
435*4882a593Smuzhiyun * utilized. Note that RFC4960 Errata 3.22 removed the
436*4882a593Smuzhiyun * other condition on ctsn moving.
437*4882a593Smuzhiyun */
438*4882a593Smuzhiyun if (flight_size < cwnd)
439*4882a593Smuzhiyun return;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun if (bytes_acked > pmtu)
442*4882a593Smuzhiyun cwnd += pmtu;
443*4882a593Smuzhiyun else
444*4882a593Smuzhiyun cwnd += bytes_acked;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun pr_debug("%s: slow start: transport:%p, bytes_acked:%d, "
447*4882a593Smuzhiyun "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n",
448*4882a593Smuzhiyun __func__, transport, bytes_acked, cwnd, ssthresh,
449*4882a593Smuzhiyun flight_size, pba);
450*4882a593Smuzhiyun } else {
451*4882a593Smuzhiyun /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
452*4882a593Smuzhiyun * upon each SACK arrival, increase partial_bytes_acked
453*4882a593Smuzhiyun * by the total number of bytes of all new chunks
454*4882a593Smuzhiyun * acknowledged in that SACK including chunks
455*4882a593Smuzhiyun * acknowledged by the new Cumulative TSN Ack and by Gap
456*4882a593Smuzhiyun * Ack Blocks. (updated by RFC4960 Errata 3.22)
457*4882a593Smuzhiyun *
458*4882a593Smuzhiyun * When partial_bytes_acked is greater than cwnd and
459*4882a593Smuzhiyun * before the arrival of the SACK the sender had less
460*4882a593Smuzhiyun * bytes of data outstanding than cwnd (i.e., before
461*4882a593Smuzhiyun * arrival of the SACK, flightsize was less than cwnd),
462*4882a593Smuzhiyun * reset partial_bytes_acked to cwnd. (RFC 4960 Errata
463*4882a593Smuzhiyun * 3.26)
464*4882a593Smuzhiyun *
465*4882a593Smuzhiyun * When partial_bytes_acked is equal to or greater than
466*4882a593Smuzhiyun * cwnd and before the arrival of the SACK the sender
467*4882a593Smuzhiyun * had cwnd or more bytes of data outstanding (i.e.,
468*4882a593Smuzhiyun * before arrival of the SACK, flightsize was greater
469*4882a593Smuzhiyun * than or equal to cwnd), partial_bytes_acked is reset
470*4882a593Smuzhiyun * to (partial_bytes_acked - cwnd). Next, cwnd is
471*4882a593Smuzhiyun * increased by MTU. (RFC 4960 Errata 3.12)
472*4882a593Smuzhiyun */
473*4882a593Smuzhiyun pba += bytes_acked;
474*4882a593Smuzhiyun if (pba > cwnd && flight_size < cwnd)
475*4882a593Smuzhiyun pba = cwnd;
476*4882a593Smuzhiyun if (pba >= cwnd && flight_size >= cwnd) {
477*4882a593Smuzhiyun pba = pba - cwnd;
478*4882a593Smuzhiyun cwnd += pmtu;
479*4882a593Smuzhiyun }
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun pr_debug("%s: congestion avoidance: transport:%p, "
482*4882a593Smuzhiyun "bytes_acked:%d, cwnd:%d, ssthresh:%d, "
483*4882a593Smuzhiyun "flight_size:%d, pba:%d\n", __func__,
484*4882a593Smuzhiyun transport, bytes_acked, cwnd, ssthresh,
485*4882a593Smuzhiyun flight_size, pba);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun transport->cwnd = cwnd;
489*4882a593Smuzhiyun transport->partial_bytes_acked = pba;
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun /* This routine is used to lower the transport's cwnd when congestion is
493*4882a593Smuzhiyun * detected.
494*4882a593Smuzhiyun */
sctp_transport_lower_cwnd(struct sctp_transport * transport,enum sctp_lower_cwnd reason)495*4882a593Smuzhiyun void sctp_transport_lower_cwnd(struct sctp_transport *transport,
496*4882a593Smuzhiyun enum sctp_lower_cwnd reason)
497*4882a593Smuzhiyun {
498*4882a593Smuzhiyun struct sctp_association *asoc = transport->asoc;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun switch (reason) {
501*4882a593Smuzhiyun case SCTP_LOWER_CWND_T3_RTX:
502*4882a593Smuzhiyun /* RFC 2960 Section 7.2.3, sctpimpguide
503*4882a593Smuzhiyun * When the T3-rtx timer expires on an address, SCTP should
504*4882a593Smuzhiyun * perform slow start by:
505*4882a593Smuzhiyun * ssthresh = max(cwnd/2, 4*MTU)
506*4882a593Smuzhiyun * cwnd = 1*MTU
507*4882a593Smuzhiyun * partial_bytes_acked = 0
508*4882a593Smuzhiyun */
509*4882a593Smuzhiyun transport->ssthresh = max(transport->cwnd/2,
510*4882a593Smuzhiyun 4*asoc->pathmtu);
511*4882a593Smuzhiyun transport->cwnd = asoc->pathmtu;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun /* T3-rtx also clears fast recovery */
514*4882a593Smuzhiyun asoc->fast_recovery = 0;
515*4882a593Smuzhiyun break;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun case SCTP_LOWER_CWND_FAST_RTX:
518*4882a593Smuzhiyun /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
519*4882a593Smuzhiyun * destination address(es) to which the missing DATA chunks
520*4882a593Smuzhiyun * were last sent, according to the formula described in
521*4882a593Smuzhiyun * Section 7.2.3.
522*4882a593Smuzhiyun *
523*4882a593Smuzhiyun * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
524*4882a593Smuzhiyun * losses from SACK (see Section 7.2.4), An endpoint
525*4882a593Smuzhiyun * should do the following:
526*4882a593Smuzhiyun * ssthresh = max(cwnd/2, 4*MTU)
527*4882a593Smuzhiyun * cwnd = ssthresh
528*4882a593Smuzhiyun * partial_bytes_acked = 0
529*4882a593Smuzhiyun */
530*4882a593Smuzhiyun if (asoc->fast_recovery)
531*4882a593Smuzhiyun return;
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun /* Mark Fast recovery */
534*4882a593Smuzhiyun asoc->fast_recovery = 1;
535*4882a593Smuzhiyun asoc->fast_recovery_exit = asoc->next_tsn - 1;
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun transport->ssthresh = max(transport->cwnd/2,
538*4882a593Smuzhiyun 4*asoc->pathmtu);
539*4882a593Smuzhiyun transport->cwnd = transport->ssthresh;
540*4882a593Smuzhiyun break;
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun case SCTP_LOWER_CWND_ECNE:
543*4882a593Smuzhiyun /* RFC 2481 Section 6.1.2.
544*4882a593Smuzhiyun * If the sender receives an ECN-Echo ACK packet
545*4882a593Smuzhiyun * then the sender knows that congestion was encountered in the
546*4882a593Smuzhiyun * network on the path from the sender to the receiver. The
547*4882a593Smuzhiyun * indication of congestion should be treated just as a
548*4882a593Smuzhiyun * congestion loss in non-ECN Capable TCP. That is, the TCP
549*4882a593Smuzhiyun * source halves the congestion window "cwnd" and reduces the
550*4882a593Smuzhiyun * slow start threshold "ssthresh".
551*4882a593Smuzhiyun * A critical condition is that TCP does not react to
552*4882a593Smuzhiyun * congestion indications more than once every window of
553*4882a593Smuzhiyun * data (or more loosely more than once every round-trip time).
554*4882a593Smuzhiyun */
555*4882a593Smuzhiyun if (time_after(jiffies, transport->last_time_ecne_reduced +
556*4882a593Smuzhiyun transport->rtt)) {
557*4882a593Smuzhiyun transport->ssthresh = max(transport->cwnd/2,
558*4882a593Smuzhiyun 4*asoc->pathmtu);
559*4882a593Smuzhiyun transport->cwnd = transport->ssthresh;
560*4882a593Smuzhiyun transport->last_time_ecne_reduced = jiffies;
561*4882a593Smuzhiyun }
562*4882a593Smuzhiyun break;
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun case SCTP_LOWER_CWND_INACTIVE:
565*4882a593Smuzhiyun /* RFC 2960 Section 7.2.1, sctpimpguide
566*4882a593Smuzhiyun * When the endpoint does not transmit data on a given
567*4882a593Smuzhiyun * transport address, the cwnd of the transport address
568*4882a593Smuzhiyun * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
569*4882a593Smuzhiyun * NOTE: Although the draft recommends that this check needs
570*4882a593Smuzhiyun * to be done every RTO interval, we do it every hearbeat
571*4882a593Smuzhiyun * interval.
572*4882a593Smuzhiyun */
573*4882a593Smuzhiyun transport->cwnd = max(transport->cwnd/2,
574*4882a593Smuzhiyun 4*asoc->pathmtu);
575*4882a593Smuzhiyun /* RFC 4960 Errata 3.27.2: also adjust sshthresh */
576*4882a593Smuzhiyun transport->ssthresh = transport->cwnd;
577*4882a593Smuzhiyun break;
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun transport->partial_bytes_acked = 0;
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n",
583*4882a593Smuzhiyun __func__, transport, reason, transport->cwnd,
584*4882a593Smuzhiyun transport->ssthresh);
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun /* Apply Max.Burst limit to the congestion window:
588*4882a593Smuzhiyun * sctpimpguide-05 2.14.2
589*4882a593Smuzhiyun * D) When the time comes for the sender to
590*4882a593Smuzhiyun * transmit new DATA chunks, the protocol parameter Max.Burst MUST
591*4882a593Smuzhiyun * first be applied to limit how many new DATA chunks may be sent.
592*4882a593Smuzhiyun * The limit is applied by adjusting cwnd as follows:
593*4882a593Smuzhiyun * if ((flightsize+ Max.Burst * MTU) < cwnd)
594*4882a593Smuzhiyun * cwnd = flightsize + Max.Burst * MTU
595*4882a593Smuzhiyun */
596*4882a593Smuzhiyun
sctp_transport_burst_limited(struct sctp_transport * t)597*4882a593Smuzhiyun void sctp_transport_burst_limited(struct sctp_transport *t)
598*4882a593Smuzhiyun {
599*4882a593Smuzhiyun struct sctp_association *asoc = t->asoc;
600*4882a593Smuzhiyun u32 old_cwnd = t->cwnd;
601*4882a593Smuzhiyun u32 max_burst_bytes;
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun if (t->burst_limited || asoc->max_burst == 0)
604*4882a593Smuzhiyun return;
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu);
607*4882a593Smuzhiyun if (max_burst_bytes < old_cwnd) {
608*4882a593Smuzhiyun t->cwnd = max_burst_bytes;
609*4882a593Smuzhiyun t->burst_limited = old_cwnd;
610*4882a593Smuzhiyun }
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun /* Restore the old cwnd congestion window, after the burst had it's
614*4882a593Smuzhiyun * desired effect.
615*4882a593Smuzhiyun */
sctp_transport_burst_reset(struct sctp_transport * t)616*4882a593Smuzhiyun void sctp_transport_burst_reset(struct sctp_transport *t)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun if (t->burst_limited) {
619*4882a593Smuzhiyun t->cwnd = t->burst_limited;
620*4882a593Smuzhiyun t->burst_limited = 0;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun /* What is the next timeout value for this transport? */
sctp_transport_timeout(struct sctp_transport * trans)625*4882a593Smuzhiyun unsigned long sctp_transport_timeout(struct sctp_transport *trans)
626*4882a593Smuzhiyun {
627*4882a593Smuzhiyun /* RTO + timer slack +/- 50% of RTO */
628*4882a593Smuzhiyun unsigned long timeout = trans->rto >> 1;
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun if (trans->state != SCTP_UNCONFIRMED &&
631*4882a593Smuzhiyun trans->state != SCTP_PF)
632*4882a593Smuzhiyun timeout += trans->hbinterval;
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun return max_t(unsigned long, timeout, HZ / 5);
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun /* Reset transport variables to their initial values */
sctp_transport_reset(struct sctp_transport * t)638*4882a593Smuzhiyun void sctp_transport_reset(struct sctp_transport *t)
639*4882a593Smuzhiyun {
640*4882a593Smuzhiyun struct sctp_association *asoc = t->asoc;
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun /* RFC 2960 (bis), Section 5.2.4
643*4882a593Smuzhiyun * All the congestion control parameters (e.g., cwnd, ssthresh)
644*4882a593Smuzhiyun * related to this peer MUST be reset to their initial values
645*4882a593Smuzhiyun * (see Section 6.2.1)
646*4882a593Smuzhiyun */
647*4882a593Smuzhiyun t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
648*4882a593Smuzhiyun t->burst_limited = 0;
649*4882a593Smuzhiyun t->ssthresh = asoc->peer.i.a_rwnd;
650*4882a593Smuzhiyun t->rto = asoc->rto_initial;
651*4882a593Smuzhiyun sctp_max_rto(asoc, t);
652*4882a593Smuzhiyun t->rtt = 0;
653*4882a593Smuzhiyun t->srtt = 0;
654*4882a593Smuzhiyun t->rttvar = 0;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun /* Reset these additional variables so that we have a clean slate. */
657*4882a593Smuzhiyun t->partial_bytes_acked = 0;
658*4882a593Smuzhiyun t->flight_size = 0;
659*4882a593Smuzhiyun t->error_count = 0;
660*4882a593Smuzhiyun t->rto_pending = 0;
661*4882a593Smuzhiyun t->hb_sent = 0;
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun /* Initialize the state information for SFR-CACC */
664*4882a593Smuzhiyun t->cacc.changeover_active = 0;
665*4882a593Smuzhiyun t->cacc.cycling_changeover = 0;
666*4882a593Smuzhiyun t->cacc.next_tsn_at_change = 0;
667*4882a593Smuzhiyun t->cacc.cacc_saw_newack = 0;
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun /* Schedule retransmission on the given transport */
sctp_transport_immediate_rtx(struct sctp_transport * t)671*4882a593Smuzhiyun void sctp_transport_immediate_rtx(struct sctp_transport *t)
672*4882a593Smuzhiyun {
673*4882a593Smuzhiyun /* Stop pending T3_rtx_timer */
674*4882a593Smuzhiyun if (del_timer(&t->T3_rtx_timer))
675*4882a593Smuzhiyun sctp_transport_put(t);
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
678*4882a593Smuzhiyun if (!timer_pending(&t->T3_rtx_timer)) {
679*4882a593Smuzhiyun if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
680*4882a593Smuzhiyun sctp_transport_hold(t);
681*4882a593Smuzhiyun }
682*4882a593Smuzhiyun }
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun /* Drop dst */
sctp_transport_dst_release(struct sctp_transport * t)685*4882a593Smuzhiyun void sctp_transport_dst_release(struct sctp_transport *t)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun dst_release(t->dst);
688*4882a593Smuzhiyun t->dst = NULL;
689*4882a593Smuzhiyun t->dst_pending_confirm = 0;
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun /* Schedule neighbour confirm */
sctp_transport_dst_confirm(struct sctp_transport * t)693*4882a593Smuzhiyun void sctp_transport_dst_confirm(struct sctp_transport *t)
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun t->dst_pending_confirm = 1;
696*4882a593Smuzhiyun }
697