1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * TCP Illinois congestion control.
4*4882a593Smuzhiyun * Home page:
5*4882a593Smuzhiyun * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * The algorithm is described in:
8*4882a593Smuzhiyun * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
9*4882a593Smuzhiyun * for High-Speed Networks"
10*4882a593Smuzhiyun * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Implemented from description in paper and ns-2 simulation.
13*4882a593Smuzhiyun * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/skbuff.h>
18*4882a593Smuzhiyun #include <linux/inet_diag.h>
19*4882a593Smuzhiyun #include <asm/div64.h>
20*4882a593Smuzhiyun #include <net/tcp.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #define ALPHA_SHIFT 7
23*4882a593Smuzhiyun #define ALPHA_SCALE (1u<<ALPHA_SHIFT)
24*4882a593Smuzhiyun #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
25*4882a593Smuzhiyun #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
26*4882a593Smuzhiyun #define ALPHA_BASE ALPHA_SCALE /* 1.0 */
27*4882a593Smuzhiyun #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #define BETA_SHIFT 6
30*4882a593Smuzhiyun #define BETA_SCALE (1u<<BETA_SHIFT)
31*4882a593Smuzhiyun #define BETA_MIN (BETA_SCALE/8) /* 0.125 */
32*4882a593Smuzhiyun #define BETA_MAX (BETA_SCALE/2) /* 0.5 */
33*4882a593Smuzhiyun #define BETA_BASE BETA_MAX
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun static int win_thresh __read_mostly = 15;
36*4882a593Smuzhiyun module_param(win_thresh, int, 0);
37*4882a593Smuzhiyun MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun static int theta __read_mostly = 5;
40*4882a593Smuzhiyun module_param(theta, int, 0);
41*4882a593Smuzhiyun MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun /* TCP Illinois Parameters */
44*4882a593Smuzhiyun struct illinois {
45*4882a593Smuzhiyun u64 sum_rtt; /* sum of rtt's measured within last rtt */
46*4882a593Smuzhiyun u16 cnt_rtt; /* # of rtts measured within last rtt */
47*4882a593Smuzhiyun u32 base_rtt; /* min of all rtt in usec */
48*4882a593Smuzhiyun u32 max_rtt; /* max of all rtt in usec */
49*4882a593Smuzhiyun u32 end_seq; /* right edge of current RTT */
50*4882a593Smuzhiyun u32 alpha; /* Additive increase */
51*4882a593Smuzhiyun u32 beta; /* Muliplicative decrease */
52*4882a593Smuzhiyun u16 acked; /* # packets acked by current ACK */
53*4882a593Smuzhiyun u8 rtt_above; /* average rtt has gone above threshold */
54*4882a593Smuzhiyun u8 rtt_low; /* # of rtts measurements below threshold */
55*4882a593Smuzhiyun };
56*4882a593Smuzhiyun
rtt_reset(struct sock * sk)57*4882a593Smuzhiyun static void rtt_reset(struct sock *sk)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
60*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun ca->end_seq = tp->snd_nxt;
63*4882a593Smuzhiyun ca->cnt_rtt = 0;
64*4882a593Smuzhiyun ca->sum_rtt = 0;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /* TODO: age max_rtt? */
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
tcp_illinois_init(struct sock * sk)69*4882a593Smuzhiyun static void tcp_illinois_init(struct sock *sk)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun ca->alpha = ALPHA_MAX;
74*4882a593Smuzhiyun ca->beta = BETA_BASE;
75*4882a593Smuzhiyun ca->base_rtt = 0x7fffffff;
76*4882a593Smuzhiyun ca->max_rtt = 0;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun ca->acked = 0;
79*4882a593Smuzhiyun ca->rtt_low = 0;
80*4882a593Smuzhiyun ca->rtt_above = 0;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun rtt_reset(sk);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /* Measure RTT for each ack. */
tcp_illinois_acked(struct sock * sk,const struct ack_sample * sample)86*4882a593Smuzhiyun static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
89*4882a593Smuzhiyun s32 rtt_us = sample->rtt_us;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun ca->acked = sample->pkts_acked;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /* dup ack, no rtt sample */
94*4882a593Smuzhiyun if (rtt_us < 0)
95*4882a593Smuzhiyun return;
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun /* ignore bogus values, this prevents wraparound in alpha math */
98*4882a593Smuzhiyun if (rtt_us > RTT_MAX)
99*4882a593Smuzhiyun rtt_us = RTT_MAX;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun /* keep track of minimum RTT seen so far */
102*4882a593Smuzhiyun if (ca->base_rtt > rtt_us)
103*4882a593Smuzhiyun ca->base_rtt = rtt_us;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun /* and max */
106*4882a593Smuzhiyun if (ca->max_rtt < rtt_us)
107*4882a593Smuzhiyun ca->max_rtt = rtt_us;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun ++ca->cnt_rtt;
110*4882a593Smuzhiyun ca->sum_rtt += rtt_us;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /* Maximum queuing delay */
max_delay(const struct illinois * ca)114*4882a593Smuzhiyun static inline u32 max_delay(const struct illinois *ca)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun return ca->max_rtt - ca->base_rtt;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun /* Average queuing delay */
avg_delay(const struct illinois * ca)120*4882a593Smuzhiyun static inline u32 avg_delay(const struct illinois *ca)
121*4882a593Smuzhiyun {
122*4882a593Smuzhiyun u64 t = ca->sum_rtt;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun do_div(t, ca->cnt_rtt);
125*4882a593Smuzhiyun return t - ca->base_rtt;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /*
129*4882a593Smuzhiyun * Compute value of alpha used for additive increase.
130*4882a593Smuzhiyun * If small window then use 1.0, equivalent to Reno.
131*4882a593Smuzhiyun *
132*4882a593Smuzhiyun * For larger windows, adjust based on average delay.
133*4882a593Smuzhiyun * A. If average delay is at minimum (we are uncongested),
134*4882a593Smuzhiyun * then use large alpha (10.0) to increase faster.
135*4882a593Smuzhiyun * B. If average delay is at maximum (getting congested)
136*4882a593Smuzhiyun * then use small alpha (0.3)
137*4882a593Smuzhiyun *
138*4882a593Smuzhiyun * The result is a convex window growth curve.
139*4882a593Smuzhiyun */
alpha(struct illinois * ca,u32 da,u32 dm)140*4882a593Smuzhiyun static u32 alpha(struct illinois *ca, u32 da, u32 dm)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun u32 d1 = dm / 100; /* Low threshold */
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun if (da <= d1) {
145*4882a593Smuzhiyun /* If never got out of low delay zone, then use max */
146*4882a593Smuzhiyun if (!ca->rtt_above)
147*4882a593Smuzhiyun return ALPHA_MAX;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /* Wait for 5 good RTT's before allowing alpha to go alpha max.
150*4882a593Smuzhiyun * This prevents one good RTT from causing sudden window increase.
151*4882a593Smuzhiyun */
152*4882a593Smuzhiyun if (++ca->rtt_low < theta)
153*4882a593Smuzhiyun return ca->alpha;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun ca->rtt_low = 0;
156*4882a593Smuzhiyun ca->rtt_above = 0;
157*4882a593Smuzhiyun return ALPHA_MAX;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun ca->rtt_above = 1;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /*
163*4882a593Smuzhiyun * Based on:
164*4882a593Smuzhiyun *
165*4882a593Smuzhiyun * (dm - d1) amin amax
166*4882a593Smuzhiyun * k1 = -------------------
167*4882a593Smuzhiyun * amax - amin
168*4882a593Smuzhiyun *
169*4882a593Smuzhiyun * (dm - d1) amin
170*4882a593Smuzhiyun * k2 = ---------------- - d1
171*4882a593Smuzhiyun * amax - amin
172*4882a593Smuzhiyun *
173*4882a593Smuzhiyun * k1
174*4882a593Smuzhiyun * alpha = ----------
175*4882a593Smuzhiyun * k2 + da
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun dm -= d1;
179*4882a593Smuzhiyun da -= d1;
180*4882a593Smuzhiyun return (dm * ALPHA_MAX) /
181*4882a593Smuzhiyun (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun /*
185*4882a593Smuzhiyun * Beta used for multiplicative decrease.
186*4882a593Smuzhiyun * For small window sizes returns same value as Reno (0.5)
187*4882a593Smuzhiyun *
188*4882a593Smuzhiyun * If delay is small (10% of max) then beta = 1/8
189*4882a593Smuzhiyun * If delay is up to 80% of max then beta = 1/2
190*4882a593Smuzhiyun * In between is a linear function
191*4882a593Smuzhiyun */
beta(u32 da,u32 dm)192*4882a593Smuzhiyun static u32 beta(u32 da, u32 dm)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun u32 d2, d3;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun d2 = dm / 10;
197*4882a593Smuzhiyun if (da <= d2)
198*4882a593Smuzhiyun return BETA_MIN;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun d3 = (8 * dm) / 10;
201*4882a593Smuzhiyun if (da >= d3 || d3 <= d2)
202*4882a593Smuzhiyun return BETA_MAX;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /*
205*4882a593Smuzhiyun * Based on:
206*4882a593Smuzhiyun *
207*4882a593Smuzhiyun * bmin d3 - bmax d2
208*4882a593Smuzhiyun * k3 = -------------------
209*4882a593Smuzhiyun * d3 - d2
210*4882a593Smuzhiyun *
211*4882a593Smuzhiyun * bmax - bmin
212*4882a593Smuzhiyun * k4 = -------------
213*4882a593Smuzhiyun * d3 - d2
214*4882a593Smuzhiyun *
215*4882a593Smuzhiyun * b = k3 + k4 da
216*4882a593Smuzhiyun */
217*4882a593Smuzhiyun return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
218*4882a593Smuzhiyun / (d3 - d2);
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun /* Update alpha and beta values once per RTT */
update_params(struct sock * sk)222*4882a593Smuzhiyun static void update_params(struct sock *sk)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
225*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun if (tp->snd_cwnd < win_thresh) {
228*4882a593Smuzhiyun ca->alpha = ALPHA_BASE;
229*4882a593Smuzhiyun ca->beta = BETA_BASE;
230*4882a593Smuzhiyun } else if (ca->cnt_rtt > 0) {
231*4882a593Smuzhiyun u32 dm = max_delay(ca);
232*4882a593Smuzhiyun u32 da = avg_delay(ca);
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun ca->alpha = alpha(ca, da, dm);
235*4882a593Smuzhiyun ca->beta = beta(da, dm);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun rtt_reset(sk);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun /*
242*4882a593Smuzhiyun * In case of loss, reset to default values
243*4882a593Smuzhiyun */
tcp_illinois_state(struct sock * sk,u8 new_state)244*4882a593Smuzhiyun static void tcp_illinois_state(struct sock *sk, u8 new_state)
245*4882a593Smuzhiyun {
246*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun if (new_state == TCP_CA_Loss) {
249*4882a593Smuzhiyun ca->alpha = ALPHA_BASE;
250*4882a593Smuzhiyun ca->beta = BETA_BASE;
251*4882a593Smuzhiyun ca->rtt_low = 0;
252*4882a593Smuzhiyun ca->rtt_above = 0;
253*4882a593Smuzhiyun rtt_reset(sk);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun /*
258*4882a593Smuzhiyun * Increase window in response to successful acknowledgment.
259*4882a593Smuzhiyun */
tcp_illinois_cong_avoid(struct sock * sk,u32 ack,u32 acked)260*4882a593Smuzhiyun static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
261*4882a593Smuzhiyun {
262*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
263*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun if (after(ack, ca->end_seq))
266*4882a593Smuzhiyun update_params(sk);
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /* RFC2861 only increase cwnd if fully utilized */
269*4882a593Smuzhiyun if (!tcp_is_cwnd_limited(sk))
270*4882a593Smuzhiyun return;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun /* In slow start */
273*4882a593Smuzhiyun if (tcp_in_slow_start(tp))
274*4882a593Smuzhiyun tcp_slow_start(tp, acked);
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun else {
277*4882a593Smuzhiyun u32 delta;
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun /* snd_cwnd_cnt is # of packets since last cwnd increment */
280*4882a593Smuzhiyun tp->snd_cwnd_cnt += ca->acked;
281*4882a593Smuzhiyun ca->acked = 1;
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun /* This is close approximation of:
284*4882a593Smuzhiyun * tp->snd_cwnd += alpha/tp->snd_cwnd
285*4882a593Smuzhiyun */
286*4882a593Smuzhiyun delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
287*4882a593Smuzhiyun if (delta >= tp->snd_cwnd) {
288*4882a593Smuzhiyun tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
289*4882a593Smuzhiyun (u32)tp->snd_cwnd_clamp);
290*4882a593Smuzhiyun tp->snd_cwnd_cnt = 0;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
tcp_illinois_ssthresh(struct sock * sk)295*4882a593Smuzhiyun static u32 tcp_illinois_ssthresh(struct sock *sk)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
298*4882a593Smuzhiyun struct illinois *ca = inet_csk_ca(sk);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun /* Multiplicative decrease */
301*4882a593Smuzhiyun return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun /* Extract info for Tcp socket info provided via netlink. */
tcp_illinois_info(struct sock * sk,u32 ext,int * attr,union tcp_cc_info * info)305*4882a593Smuzhiyun static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
306*4882a593Smuzhiyun union tcp_cc_info *info)
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun const struct illinois *ca = inet_csk_ca(sk);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
311*4882a593Smuzhiyun info->vegas.tcpv_enabled = 1;
312*4882a593Smuzhiyun info->vegas.tcpv_rttcnt = ca->cnt_rtt;
313*4882a593Smuzhiyun info->vegas.tcpv_minrtt = ca->base_rtt;
314*4882a593Smuzhiyun info->vegas.tcpv_rtt = 0;
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun if (info->vegas.tcpv_rttcnt > 0) {
317*4882a593Smuzhiyun u64 t = ca->sum_rtt;
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun do_div(t, info->vegas.tcpv_rttcnt);
320*4882a593Smuzhiyun info->vegas.tcpv_rtt = t;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun *attr = INET_DIAG_VEGASINFO;
323*4882a593Smuzhiyun return sizeof(struct tcpvegas_info);
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun static struct tcp_congestion_ops tcp_illinois __read_mostly = {
329*4882a593Smuzhiyun .init = tcp_illinois_init,
330*4882a593Smuzhiyun .ssthresh = tcp_illinois_ssthresh,
331*4882a593Smuzhiyun .undo_cwnd = tcp_reno_undo_cwnd,
332*4882a593Smuzhiyun .cong_avoid = tcp_illinois_cong_avoid,
333*4882a593Smuzhiyun .set_state = tcp_illinois_state,
334*4882a593Smuzhiyun .get_info = tcp_illinois_info,
335*4882a593Smuzhiyun .pkts_acked = tcp_illinois_acked,
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun .owner = THIS_MODULE,
338*4882a593Smuzhiyun .name = "illinois",
339*4882a593Smuzhiyun };
340*4882a593Smuzhiyun
tcp_illinois_register(void)341*4882a593Smuzhiyun static int __init tcp_illinois_register(void)
342*4882a593Smuzhiyun {
343*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
344*4882a593Smuzhiyun return tcp_register_congestion_control(&tcp_illinois);
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
tcp_illinois_unregister(void)347*4882a593Smuzhiyun static void __exit tcp_illinois_unregister(void)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun tcp_unregister_congestion_control(&tcp_illinois);
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun module_init(tcp_illinois_register);
353*4882a593Smuzhiyun module_exit(tcp_illinois_unregister);
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
356*4882a593Smuzhiyun MODULE_LICENSE("GPL");
357*4882a593Smuzhiyun MODULE_DESCRIPTION("TCP Illinois");
358*4882a593Smuzhiyun MODULE_VERSION("1.0");
359