xref: /OK3568_Linux_fs/kernel/net/ipv4/tcp_yeah.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  *   YeAH TCP
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * For further details look at:
7*4882a593Smuzhiyun  *   https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/module.h>
12*4882a593Smuzhiyun #include <linux/skbuff.h>
13*4882a593Smuzhiyun #include <linux/inet_diag.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #include <net/tcp.h>
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun #include "tcp_vegas.h"
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun #define TCP_YEAH_ALPHA       80 /* number of packets queued at the bottleneck */
20*4882a593Smuzhiyun #define TCP_YEAH_GAMMA        1 /* fraction of queue to be removed per rtt */
21*4882a593Smuzhiyun #define TCP_YEAH_DELTA        3 /* log minimum fraction of cwnd to be removed on loss */
22*4882a593Smuzhiyun #define TCP_YEAH_EPSILON      1 /* log maximum fraction to be removed on early decongestion */
23*4882a593Smuzhiyun #define TCP_YEAH_PHY          8 /* maximum delta from base */
24*4882a593Smuzhiyun #define TCP_YEAH_RHO         16 /* minimum number of consecutive rtt to consider competition on loss */
25*4882a593Smuzhiyun #define TCP_YEAH_ZETA        50 /* minimum number of state switches to reset reno_count */
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #define TCP_SCALABLE_AI_CNT	 100U
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun /* YeAH variables */
30*4882a593Smuzhiyun struct yeah {
31*4882a593Smuzhiyun 	struct vegas vegas;	/* must be first */
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun 	/* YeAH */
34*4882a593Smuzhiyun 	u32 lastQ;
35*4882a593Smuzhiyun 	u32 doing_reno_now;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	u32 reno_count;
38*4882a593Smuzhiyun 	u32 fast_count;
39*4882a593Smuzhiyun };
40*4882a593Smuzhiyun 
tcp_yeah_init(struct sock * sk)41*4882a593Smuzhiyun static void tcp_yeah_init(struct sock *sk)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	struct tcp_sock *tp = tcp_sk(sk);
44*4882a593Smuzhiyun 	struct yeah *yeah = inet_csk_ca(sk);
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	tcp_vegas_init(sk);
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	yeah->doing_reno_now = 0;
49*4882a593Smuzhiyun 	yeah->lastQ = 0;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	yeah->reno_count = 2;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	/* Ensure the MD arithmetic works.  This is somewhat pedantic,
54*4882a593Smuzhiyun 	 * since I don't think we will see a cwnd this large. :) */
55*4882a593Smuzhiyun 	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun 
tcp_yeah_cong_avoid(struct sock * sk,u32 ack,u32 acked)58*4882a593Smuzhiyun static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	struct tcp_sock *tp = tcp_sk(sk);
61*4882a593Smuzhiyun 	struct yeah *yeah = inet_csk_ca(sk);
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	if (!tcp_is_cwnd_limited(sk))
64*4882a593Smuzhiyun 		return;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	if (tcp_in_slow_start(tp)) {
67*4882a593Smuzhiyun 		acked = tcp_slow_start(tp, acked);
68*4882a593Smuzhiyun 		if (!acked)
69*4882a593Smuzhiyun 			goto do_vegas;
70*4882a593Smuzhiyun 	}
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	if (!yeah->doing_reno_now) {
73*4882a593Smuzhiyun 		/* Scalable */
74*4882a593Smuzhiyun 		tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
75*4882a593Smuzhiyun 				  acked);
76*4882a593Smuzhiyun 	} else {
77*4882a593Smuzhiyun 		/* Reno */
78*4882a593Smuzhiyun 		tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
79*4882a593Smuzhiyun 	}
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
82*4882a593Smuzhiyun 	 *
83*4882a593Smuzhiyun 	 * These are so named because they represent the approximate values
84*4882a593Smuzhiyun 	 * of snd_una and snd_nxt at the beginning of the current RTT. More
85*4882a593Smuzhiyun 	 * precisely, they represent the amount of data sent during the RTT.
86*4882a593Smuzhiyun 	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
87*4882a593Smuzhiyun 	 * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
88*4882a593Smuzhiyun 	 * bytes of data have been ACKed during the course of the RTT, giving
89*4882a593Smuzhiyun 	 * an "actual" rate of:
90*4882a593Smuzhiyun 	 *
91*4882a593Smuzhiyun 	 *     (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
92*4882a593Smuzhiyun 	 *
93*4882a593Smuzhiyun 	 * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
94*4882a593Smuzhiyun 	 * because delayed ACKs can cover more than one segment, so they
95*4882a593Smuzhiyun 	 * don't line up yeahly with the boundaries of RTTs.
96*4882a593Smuzhiyun 	 *
97*4882a593Smuzhiyun 	 * Another unfortunate fact of life is that delayed ACKs delay the
98*4882a593Smuzhiyun 	 * advance of the left edge of our send window, so that the number
99*4882a593Smuzhiyun 	 * of bytes we send in an RTT is often less than our cwnd will allow.
100*4882a593Smuzhiyun 	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
101*4882a593Smuzhiyun 	 */
102*4882a593Smuzhiyun do_vegas:
103*4882a593Smuzhiyun 	if (after(ack, yeah->vegas.beg_snd_nxt)) {
104*4882a593Smuzhiyun 		/* We do the Vegas calculations only if we got enough RTT
105*4882a593Smuzhiyun 		 * samples that we can be reasonably sure that we got
106*4882a593Smuzhiyun 		 * at least one RTT sample that wasn't from a delayed ACK.
107*4882a593Smuzhiyun 		 * If we only had 2 samples total,
108*4882a593Smuzhiyun 		 * then that means we're getting only 1 ACK per RTT, which
109*4882a593Smuzhiyun 		 * means they're almost certainly delayed ACKs.
110*4882a593Smuzhiyun 		 * If  we have 3 samples, we should be OK.
111*4882a593Smuzhiyun 		 */
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 		if (yeah->vegas.cntRTT > 2) {
114*4882a593Smuzhiyun 			u32 rtt, queue;
115*4882a593Smuzhiyun 			u64 bw;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 			/* We have enough RTT samples, so, using the Vegas
118*4882a593Smuzhiyun 			 * algorithm, we determine if we should increase or
119*4882a593Smuzhiyun 			 * decrease cwnd, and by how much.
120*4882a593Smuzhiyun 			 */
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 			/* Pluck out the RTT we are using for the Vegas
123*4882a593Smuzhiyun 			 * calculations. This is the min RTT seen during the
124*4882a593Smuzhiyun 			 * last RTT. Taking the min filters out the effects
125*4882a593Smuzhiyun 			 * of delayed ACKs, at the cost of noticing congestion
126*4882a593Smuzhiyun 			 * a bit later.
127*4882a593Smuzhiyun 			 */
128*4882a593Smuzhiyun 			rtt = yeah->vegas.minRTT;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 			/* Compute excess number of packets above bandwidth
131*4882a593Smuzhiyun 			 * Avoid doing full 64 bit divide.
132*4882a593Smuzhiyun 			 */
133*4882a593Smuzhiyun 			bw = tp->snd_cwnd;
134*4882a593Smuzhiyun 			bw *= rtt - yeah->vegas.baseRTT;
135*4882a593Smuzhiyun 			do_div(bw, rtt);
136*4882a593Smuzhiyun 			queue = bw;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 			if (queue > TCP_YEAH_ALPHA ||
139*4882a593Smuzhiyun 			    rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
140*4882a593Smuzhiyun 				if (queue > TCP_YEAH_ALPHA &&
141*4882a593Smuzhiyun 				    tp->snd_cwnd > yeah->reno_count) {
142*4882a593Smuzhiyun 					u32 reduction = min(queue / TCP_YEAH_GAMMA ,
143*4882a593Smuzhiyun 							    tp->snd_cwnd >> TCP_YEAH_EPSILON);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 					tp->snd_cwnd -= reduction;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 					tp->snd_cwnd = max(tp->snd_cwnd,
148*4882a593Smuzhiyun 							   yeah->reno_count);
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 					tp->snd_ssthresh = tp->snd_cwnd;
151*4882a593Smuzhiyun 				}
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 				if (yeah->reno_count <= 2)
154*4882a593Smuzhiyun 					yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
155*4882a593Smuzhiyun 				else
156*4882a593Smuzhiyun 					yeah->reno_count++;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 				yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
159*4882a593Smuzhiyun 							   0xffffffU);
160*4882a593Smuzhiyun 			} else {
161*4882a593Smuzhiyun 				yeah->fast_count++;
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 				if (yeah->fast_count > TCP_YEAH_ZETA) {
164*4882a593Smuzhiyun 					yeah->reno_count = 2;
165*4882a593Smuzhiyun 					yeah->fast_count = 0;
166*4882a593Smuzhiyun 				}
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 				yeah->doing_reno_now = 0;
169*4882a593Smuzhiyun 			}
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 			yeah->lastQ = queue;
172*4882a593Smuzhiyun 		}
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 		/* Save the extent of the current window so we can use this
175*4882a593Smuzhiyun 		 * at the end of the next RTT.
176*4882a593Smuzhiyun 		 */
177*4882a593Smuzhiyun 		yeah->vegas.beg_snd_una  = yeah->vegas.beg_snd_nxt;
178*4882a593Smuzhiyun 		yeah->vegas.beg_snd_nxt  = tp->snd_nxt;
179*4882a593Smuzhiyun 		yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 		/* Wipe the slate clean for the next RTT. */
182*4882a593Smuzhiyun 		yeah->vegas.cntRTT = 0;
183*4882a593Smuzhiyun 		yeah->vegas.minRTT = 0x7fffffff;
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun 
tcp_yeah_ssthresh(struct sock * sk)187*4882a593Smuzhiyun static u32 tcp_yeah_ssthresh(struct sock *sk)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun 	const struct tcp_sock *tp = tcp_sk(sk);
190*4882a593Smuzhiyun 	struct yeah *yeah = inet_csk_ca(sk);
191*4882a593Smuzhiyun 	u32 reduction;
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	if (yeah->doing_reno_now < TCP_YEAH_RHO) {
194*4882a593Smuzhiyun 		reduction = yeah->lastQ;
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 		reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 		reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
199*4882a593Smuzhiyun 	} else
200*4882a593Smuzhiyun 		reduction = max(tp->snd_cwnd>>1, 2U);
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	yeah->fast_count = 0;
203*4882a593Smuzhiyun 	yeah->reno_count = max(yeah->reno_count>>1, 2U);
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	return max_t(int, tp->snd_cwnd - reduction, 2);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun static struct tcp_congestion_ops tcp_yeah __read_mostly = {
209*4882a593Smuzhiyun 	.init		= tcp_yeah_init,
210*4882a593Smuzhiyun 	.ssthresh	= tcp_yeah_ssthresh,
211*4882a593Smuzhiyun 	.undo_cwnd      = tcp_reno_undo_cwnd,
212*4882a593Smuzhiyun 	.cong_avoid	= tcp_yeah_cong_avoid,
213*4882a593Smuzhiyun 	.set_state	= tcp_vegas_state,
214*4882a593Smuzhiyun 	.cwnd_event	= tcp_vegas_cwnd_event,
215*4882a593Smuzhiyun 	.get_info	= tcp_vegas_get_info,
216*4882a593Smuzhiyun 	.pkts_acked	= tcp_vegas_pkts_acked,
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	.owner		= THIS_MODULE,
219*4882a593Smuzhiyun 	.name		= "yeah",
220*4882a593Smuzhiyun };
221*4882a593Smuzhiyun 
tcp_yeah_register(void)222*4882a593Smuzhiyun static int __init tcp_yeah_register(void)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun 	BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
225*4882a593Smuzhiyun 	tcp_register_congestion_control(&tcp_yeah);
226*4882a593Smuzhiyun 	return 0;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
tcp_yeah_unregister(void)229*4882a593Smuzhiyun static void __exit tcp_yeah_unregister(void)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun 	tcp_unregister_congestion_control(&tcp_yeah);
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun module_init(tcp_yeah_register);
235*4882a593Smuzhiyun module_exit(tcp_yeah_unregister);
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun MODULE_AUTHOR("Angelo P. Castellani");
238*4882a593Smuzhiyun MODULE_LICENSE("GPL");
239*4882a593Smuzhiyun MODULE_DESCRIPTION("YeAH TCP");
240