1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * YeAH TCP
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * For further details look at:
7*4882a593Smuzhiyun * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun */
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/module.h>
12*4882a593Smuzhiyun #include <linux/skbuff.h>
13*4882a593Smuzhiyun #include <linux/inet_diag.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <net/tcp.h>
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #include "tcp_vegas.h"
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */
20*4882a593Smuzhiyun #define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */
21*4882a593Smuzhiyun #define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */
22*4882a593Smuzhiyun #define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */
23*4882a593Smuzhiyun #define TCP_YEAH_PHY 8 /* maximum delta from base */
24*4882a593Smuzhiyun #define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */
25*4882a593Smuzhiyun #define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #define TCP_SCALABLE_AI_CNT 100U
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun /* YeAH variables */
30*4882a593Smuzhiyun struct yeah {
31*4882a593Smuzhiyun struct vegas vegas; /* must be first */
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /* YeAH */
34*4882a593Smuzhiyun u32 lastQ;
35*4882a593Smuzhiyun u32 doing_reno_now;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun u32 reno_count;
38*4882a593Smuzhiyun u32 fast_count;
39*4882a593Smuzhiyun };
40*4882a593Smuzhiyun
tcp_yeah_init(struct sock * sk)41*4882a593Smuzhiyun static void tcp_yeah_init(struct sock *sk)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
44*4882a593Smuzhiyun struct yeah *yeah = inet_csk_ca(sk);
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun tcp_vegas_init(sk);
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun yeah->doing_reno_now = 0;
49*4882a593Smuzhiyun yeah->lastQ = 0;
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun yeah->reno_count = 2;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /* Ensure the MD arithmetic works. This is somewhat pedantic,
54*4882a593Smuzhiyun * since I don't think we will see a cwnd this large. :) */
55*4882a593Smuzhiyun tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun
tcp_yeah_cong_avoid(struct sock * sk,u32 ack,u32 acked)58*4882a593Smuzhiyun static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
61*4882a593Smuzhiyun struct yeah *yeah = inet_csk_ca(sk);
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun if (!tcp_is_cwnd_limited(sk))
64*4882a593Smuzhiyun return;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun if (tcp_in_slow_start(tp)) {
67*4882a593Smuzhiyun acked = tcp_slow_start(tp, acked);
68*4882a593Smuzhiyun if (!acked)
69*4882a593Smuzhiyun goto do_vegas;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun if (!yeah->doing_reno_now) {
73*4882a593Smuzhiyun /* Scalable */
74*4882a593Smuzhiyun tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
75*4882a593Smuzhiyun acked);
76*4882a593Smuzhiyun } else {
77*4882a593Smuzhiyun /* Reno */
78*4882a593Smuzhiyun tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
82*4882a593Smuzhiyun *
83*4882a593Smuzhiyun * These are so named because they represent the approximate values
84*4882a593Smuzhiyun * of snd_una and snd_nxt at the beginning of the current RTT. More
85*4882a593Smuzhiyun * precisely, they represent the amount of data sent during the RTT.
86*4882a593Smuzhiyun * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
87*4882a593Smuzhiyun * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
88*4882a593Smuzhiyun * bytes of data have been ACKed during the course of the RTT, giving
89*4882a593Smuzhiyun * an "actual" rate of:
90*4882a593Smuzhiyun *
91*4882a593Smuzhiyun * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
92*4882a593Smuzhiyun *
93*4882a593Smuzhiyun * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
94*4882a593Smuzhiyun * because delayed ACKs can cover more than one segment, so they
95*4882a593Smuzhiyun * don't line up yeahly with the boundaries of RTTs.
96*4882a593Smuzhiyun *
97*4882a593Smuzhiyun * Another unfortunate fact of life is that delayed ACKs delay the
98*4882a593Smuzhiyun * advance of the left edge of our send window, so that the number
99*4882a593Smuzhiyun * of bytes we send in an RTT is often less than our cwnd will allow.
100*4882a593Smuzhiyun * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
101*4882a593Smuzhiyun */
102*4882a593Smuzhiyun do_vegas:
103*4882a593Smuzhiyun if (after(ack, yeah->vegas.beg_snd_nxt)) {
104*4882a593Smuzhiyun /* We do the Vegas calculations only if we got enough RTT
105*4882a593Smuzhiyun * samples that we can be reasonably sure that we got
106*4882a593Smuzhiyun * at least one RTT sample that wasn't from a delayed ACK.
107*4882a593Smuzhiyun * If we only had 2 samples total,
108*4882a593Smuzhiyun * then that means we're getting only 1 ACK per RTT, which
109*4882a593Smuzhiyun * means they're almost certainly delayed ACKs.
110*4882a593Smuzhiyun * If we have 3 samples, we should be OK.
111*4882a593Smuzhiyun */
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun if (yeah->vegas.cntRTT > 2) {
114*4882a593Smuzhiyun u32 rtt, queue;
115*4882a593Smuzhiyun u64 bw;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun /* We have enough RTT samples, so, using the Vegas
118*4882a593Smuzhiyun * algorithm, we determine if we should increase or
119*4882a593Smuzhiyun * decrease cwnd, and by how much.
120*4882a593Smuzhiyun */
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun /* Pluck out the RTT we are using for the Vegas
123*4882a593Smuzhiyun * calculations. This is the min RTT seen during the
124*4882a593Smuzhiyun * last RTT. Taking the min filters out the effects
125*4882a593Smuzhiyun * of delayed ACKs, at the cost of noticing congestion
126*4882a593Smuzhiyun * a bit later.
127*4882a593Smuzhiyun */
128*4882a593Smuzhiyun rtt = yeah->vegas.minRTT;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /* Compute excess number of packets above bandwidth
131*4882a593Smuzhiyun * Avoid doing full 64 bit divide.
132*4882a593Smuzhiyun */
133*4882a593Smuzhiyun bw = tp->snd_cwnd;
134*4882a593Smuzhiyun bw *= rtt - yeah->vegas.baseRTT;
135*4882a593Smuzhiyun do_div(bw, rtt);
136*4882a593Smuzhiyun queue = bw;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun if (queue > TCP_YEAH_ALPHA ||
139*4882a593Smuzhiyun rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
140*4882a593Smuzhiyun if (queue > TCP_YEAH_ALPHA &&
141*4882a593Smuzhiyun tp->snd_cwnd > yeah->reno_count) {
142*4882a593Smuzhiyun u32 reduction = min(queue / TCP_YEAH_GAMMA ,
143*4882a593Smuzhiyun tp->snd_cwnd >> TCP_YEAH_EPSILON);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun tp->snd_cwnd -= reduction;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun tp->snd_cwnd = max(tp->snd_cwnd,
148*4882a593Smuzhiyun yeah->reno_count);
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun tp->snd_ssthresh = tp->snd_cwnd;
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun if (yeah->reno_count <= 2)
154*4882a593Smuzhiyun yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
155*4882a593Smuzhiyun else
156*4882a593Smuzhiyun yeah->reno_count++;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
159*4882a593Smuzhiyun 0xffffffU);
160*4882a593Smuzhiyun } else {
161*4882a593Smuzhiyun yeah->fast_count++;
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun if (yeah->fast_count > TCP_YEAH_ZETA) {
164*4882a593Smuzhiyun yeah->reno_count = 2;
165*4882a593Smuzhiyun yeah->fast_count = 0;
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun yeah->doing_reno_now = 0;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun yeah->lastQ = queue;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun /* Save the extent of the current window so we can use this
175*4882a593Smuzhiyun * at the end of the next RTT.
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
178*4882a593Smuzhiyun yeah->vegas.beg_snd_nxt = tp->snd_nxt;
179*4882a593Smuzhiyun yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun /* Wipe the slate clean for the next RTT. */
182*4882a593Smuzhiyun yeah->vegas.cntRTT = 0;
183*4882a593Smuzhiyun yeah->vegas.minRTT = 0x7fffffff;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun
tcp_yeah_ssthresh(struct sock * sk)187*4882a593Smuzhiyun static u32 tcp_yeah_ssthresh(struct sock *sk)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun const struct tcp_sock *tp = tcp_sk(sk);
190*4882a593Smuzhiyun struct yeah *yeah = inet_csk_ca(sk);
191*4882a593Smuzhiyun u32 reduction;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun if (yeah->doing_reno_now < TCP_YEAH_RHO) {
194*4882a593Smuzhiyun reduction = yeah->lastQ;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
199*4882a593Smuzhiyun } else
200*4882a593Smuzhiyun reduction = max(tp->snd_cwnd>>1, 2U);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun yeah->fast_count = 0;
203*4882a593Smuzhiyun yeah->reno_count = max(yeah->reno_count>>1, 2U);
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun return max_t(int, tp->snd_cwnd - reduction, 2);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun static struct tcp_congestion_ops tcp_yeah __read_mostly = {
209*4882a593Smuzhiyun .init = tcp_yeah_init,
210*4882a593Smuzhiyun .ssthresh = tcp_yeah_ssthresh,
211*4882a593Smuzhiyun .undo_cwnd = tcp_reno_undo_cwnd,
212*4882a593Smuzhiyun .cong_avoid = tcp_yeah_cong_avoid,
213*4882a593Smuzhiyun .set_state = tcp_vegas_state,
214*4882a593Smuzhiyun .cwnd_event = tcp_vegas_cwnd_event,
215*4882a593Smuzhiyun .get_info = tcp_vegas_get_info,
216*4882a593Smuzhiyun .pkts_acked = tcp_vegas_pkts_acked,
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun .owner = THIS_MODULE,
219*4882a593Smuzhiyun .name = "yeah",
220*4882a593Smuzhiyun };
221*4882a593Smuzhiyun
tcp_yeah_register(void)222*4882a593Smuzhiyun static int __init tcp_yeah_register(void)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
225*4882a593Smuzhiyun tcp_register_congestion_control(&tcp_yeah);
226*4882a593Smuzhiyun return 0;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
tcp_yeah_unregister(void)229*4882a593Smuzhiyun static void __exit tcp_yeah_unregister(void)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun tcp_unregister_congestion_control(&tcp_yeah);
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun module_init(tcp_yeah_register);
235*4882a593Smuzhiyun module_exit(tcp_yeah_unregister);
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun MODULE_AUTHOR("Angelo P. Castellani");
238*4882a593Smuzhiyun MODULE_LICENSE("GPL");
239*4882a593Smuzhiyun MODULE_DESCRIPTION("YeAH TCP");
240