1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * H-TCP congestion control. The algorithm is detailed in:
4*4882a593Smuzhiyun * R.N.Shorten, D.J.Leith:
5*4882a593Smuzhiyun * "H-TCP: TCP for high-speed and long-distance networks"
6*4882a593Smuzhiyun * Proc. PFLDnet, Argonne, 2004.
7*4882a593Smuzhiyun * https://www.hamilton.ie/net/htcp3.pdf
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/module.h>
12*4882a593Smuzhiyun #include <net/tcp.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */
15*4882a593Smuzhiyun #define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
16*4882a593Smuzhiyun #define BETA_MAX 102 /* 0.8 with shift << 7 */
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun static int use_rtt_scaling __read_mostly = 1;
19*4882a593Smuzhiyun module_param(use_rtt_scaling, int, 0644);
20*4882a593Smuzhiyun MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun static int use_bandwidth_switch __read_mostly = 1;
23*4882a593Smuzhiyun module_param(use_bandwidth_switch, int, 0644);
24*4882a593Smuzhiyun MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun struct htcp {
27*4882a593Smuzhiyun u32 alpha; /* Fixed point arith, << 7 */
28*4882a593Smuzhiyun u8 beta; /* Fixed point arith, << 7 */
29*4882a593Smuzhiyun u8 modeswitch; /* Delay modeswitch
30*4882a593Smuzhiyun until we had at least one congestion event */
31*4882a593Smuzhiyun u16 pkts_acked;
32*4882a593Smuzhiyun u32 packetcount;
33*4882a593Smuzhiyun u32 minRTT;
34*4882a593Smuzhiyun u32 maxRTT;
35*4882a593Smuzhiyun u32 last_cong; /* Time since last congestion event end */
36*4882a593Smuzhiyun u32 undo_last_cong;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun u32 undo_maxRTT;
39*4882a593Smuzhiyun u32 undo_old_maxB;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun /* Bandwidth estimation */
42*4882a593Smuzhiyun u32 minB;
43*4882a593Smuzhiyun u32 maxB;
44*4882a593Smuzhiyun u32 old_maxB;
45*4882a593Smuzhiyun u32 Bi;
46*4882a593Smuzhiyun u32 lasttime;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun
htcp_cong_time(const struct htcp * ca)49*4882a593Smuzhiyun static inline u32 htcp_cong_time(const struct htcp *ca)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun return jiffies - ca->last_cong;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun
htcp_ccount(const struct htcp * ca)54*4882a593Smuzhiyun static inline u32 htcp_ccount(const struct htcp *ca)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun return htcp_cong_time(ca) / ca->minRTT;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
htcp_reset(struct htcp * ca)59*4882a593Smuzhiyun static inline void htcp_reset(struct htcp *ca)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun ca->undo_last_cong = ca->last_cong;
62*4882a593Smuzhiyun ca->undo_maxRTT = ca->maxRTT;
63*4882a593Smuzhiyun ca->undo_old_maxB = ca->old_maxB;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun ca->last_cong = jiffies;
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun
htcp_cwnd_undo(struct sock * sk)68*4882a593Smuzhiyun static u32 htcp_cwnd_undo(struct sock *sk)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun if (ca->undo_last_cong) {
73*4882a593Smuzhiyun ca->last_cong = ca->undo_last_cong;
74*4882a593Smuzhiyun ca->maxRTT = ca->undo_maxRTT;
75*4882a593Smuzhiyun ca->old_maxB = ca->undo_old_maxB;
76*4882a593Smuzhiyun ca->undo_last_cong = 0;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun return tcp_reno_undo_cwnd(sk);
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun
measure_rtt(struct sock * sk,u32 srtt)82*4882a593Smuzhiyun static inline void measure_rtt(struct sock *sk, u32 srtt)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun const struct inet_connection_sock *icsk = inet_csk(sk);
85*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun /* keep track of minimum RTT seen so far, minRTT is zero at first */
88*4882a593Smuzhiyun if (ca->minRTT > srtt || !ca->minRTT)
89*4882a593Smuzhiyun ca->minRTT = srtt;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun /* max RTT */
92*4882a593Smuzhiyun if (icsk->icsk_ca_state == TCP_CA_Open) {
93*4882a593Smuzhiyun if (ca->maxRTT < ca->minRTT)
94*4882a593Smuzhiyun ca->maxRTT = ca->minRTT;
95*4882a593Smuzhiyun if (ca->maxRTT < srtt &&
96*4882a593Smuzhiyun srtt <= ca->maxRTT + msecs_to_jiffies(20))
97*4882a593Smuzhiyun ca->maxRTT = srtt;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
measure_achieved_throughput(struct sock * sk,const struct ack_sample * sample)101*4882a593Smuzhiyun static void measure_achieved_throughput(struct sock *sk,
102*4882a593Smuzhiyun const struct ack_sample *sample)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun const struct inet_connection_sock *icsk = inet_csk(sk);
105*4882a593Smuzhiyun const struct tcp_sock *tp = tcp_sk(sk);
106*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
107*4882a593Smuzhiyun u32 now = tcp_jiffies32;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun if (icsk->icsk_ca_state == TCP_CA_Open)
110*4882a593Smuzhiyun ca->pkts_acked = sample->pkts_acked;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun if (sample->rtt_us > 0)
113*4882a593Smuzhiyun measure_rtt(sk, usecs_to_jiffies(sample->rtt_us));
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun if (!use_bandwidth_switch)
116*4882a593Smuzhiyun return;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun /* achieved throughput calculations */
119*4882a593Smuzhiyun if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) {
120*4882a593Smuzhiyun ca->packetcount = 0;
121*4882a593Smuzhiyun ca->lasttime = now;
122*4882a593Smuzhiyun return;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun ca->packetcount += sample->pkts_acked;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
128*4882a593Smuzhiyun now - ca->lasttime >= ca->minRTT &&
129*4882a593Smuzhiyun ca->minRTT > 0) {
130*4882a593Smuzhiyun __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun if (htcp_ccount(ca) <= 3) {
133*4882a593Smuzhiyun /* just after backoff */
134*4882a593Smuzhiyun ca->minB = ca->maxB = ca->Bi = cur_Bi;
135*4882a593Smuzhiyun } else {
136*4882a593Smuzhiyun ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
137*4882a593Smuzhiyun if (ca->Bi > ca->maxB)
138*4882a593Smuzhiyun ca->maxB = ca->Bi;
139*4882a593Smuzhiyun if (ca->minB > ca->maxB)
140*4882a593Smuzhiyun ca->minB = ca->maxB;
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun ca->packetcount = 0;
143*4882a593Smuzhiyun ca->lasttime = now;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun
htcp_beta_update(struct htcp * ca,u32 minRTT,u32 maxRTT)147*4882a593Smuzhiyun static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun if (use_bandwidth_switch) {
150*4882a593Smuzhiyun u32 maxB = ca->maxB;
151*4882a593Smuzhiyun u32 old_maxB = ca->old_maxB;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun ca->old_maxB = ca->maxB;
154*4882a593Smuzhiyun if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
155*4882a593Smuzhiyun ca->beta = BETA_MIN;
156*4882a593Smuzhiyun ca->modeswitch = 0;
157*4882a593Smuzhiyun return;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
162*4882a593Smuzhiyun ca->beta = (minRTT << 7) / maxRTT;
163*4882a593Smuzhiyun if (ca->beta < BETA_MIN)
164*4882a593Smuzhiyun ca->beta = BETA_MIN;
165*4882a593Smuzhiyun else if (ca->beta > BETA_MAX)
166*4882a593Smuzhiyun ca->beta = BETA_MAX;
167*4882a593Smuzhiyun } else {
168*4882a593Smuzhiyun ca->beta = BETA_MIN;
169*4882a593Smuzhiyun ca->modeswitch = 1;
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun
htcp_alpha_update(struct htcp * ca)173*4882a593Smuzhiyun static inline void htcp_alpha_update(struct htcp *ca)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun u32 minRTT = ca->minRTT;
176*4882a593Smuzhiyun u32 factor = 1;
177*4882a593Smuzhiyun u32 diff = htcp_cong_time(ca);
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun if (diff > HZ) {
180*4882a593Smuzhiyun diff -= HZ;
181*4882a593Smuzhiyun factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun if (use_rtt_scaling && minRTT) {
185*4882a593Smuzhiyun u32 scale = (HZ << 3) / (10 * minRTT);
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun /* clamping ratio to interval [0.5,10]<<3 */
188*4882a593Smuzhiyun scale = min(max(scale, 1U << 2), 10U << 3);
189*4882a593Smuzhiyun factor = (factor << 3) / scale;
190*4882a593Smuzhiyun if (!factor)
191*4882a593Smuzhiyun factor = 1;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
195*4882a593Smuzhiyun if (!ca->alpha)
196*4882a593Smuzhiyun ca->alpha = ALPHA_BASE;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /*
200*4882a593Smuzhiyun * After we have the rtt data to calculate beta, we'd still prefer to wait one
201*4882a593Smuzhiyun * rtt before we adjust our beta to ensure we are working from a consistent
202*4882a593Smuzhiyun * data.
203*4882a593Smuzhiyun *
204*4882a593Smuzhiyun * This function should be called when we hit a congestion event since only at
205*4882a593Smuzhiyun * that point do we really have a real sense of maxRTT (the queues en route
206*4882a593Smuzhiyun * were getting just too full now).
207*4882a593Smuzhiyun */
htcp_param_update(struct sock * sk)208*4882a593Smuzhiyun static void htcp_param_update(struct sock *sk)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
211*4882a593Smuzhiyun u32 minRTT = ca->minRTT;
212*4882a593Smuzhiyun u32 maxRTT = ca->maxRTT;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun htcp_beta_update(ca, minRTT, maxRTT);
215*4882a593Smuzhiyun htcp_alpha_update(ca);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun /* add slowly fading memory for maxRTT to accommodate routing changes */
218*4882a593Smuzhiyun if (minRTT > 0 && maxRTT > minRTT)
219*4882a593Smuzhiyun ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
htcp_recalc_ssthresh(struct sock * sk)222*4882a593Smuzhiyun static u32 htcp_recalc_ssthresh(struct sock *sk)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun const struct tcp_sock *tp = tcp_sk(sk);
225*4882a593Smuzhiyun const struct htcp *ca = inet_csk_ca(sk);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun htcp_param_update(sk);
228*4882a593Smuzhiyun return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun
htcp_cong_avoid(struct sock * sk,u32 ack,u32 acked)231*4882a593Smuzhiyun static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
232*4882a593Smuzhiyun {
233*4882a593Smuzhiyun struct tcp_sock *tp = tcp_sk(sk);
234*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun if (!tcp_is_cwnd_limited(sk))
237*4882a593Smuzhiyun return;
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun if (tcp_in_slow_start(tp))
240*4882a593Smuzhiyun tcp_slow_start(tp, acked);
241*4882a593Smuzhiyun else {
242*4882a593Smuzhiyun /* In dangerous area, increase slowly.
243*4882a593Smuzhiyun * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
244*4882a593Smuzhiyun */
245*4882a593Smuzhiyun if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
246*4882a593Smuzhiyun if (tp->snd_cwnd < tp->snd_cwnd_clamp)
247*4882a593Smuzhiyun tp->snd_cwnd++;
248*4882a593Smuzhiyun tp->snd_cwnd_cnt = 0;
249*4882a593Smuzhiyun htcp_alpha_update(ca);
250*4882a593Smuzhiyun } else
251*4882a593Smuzhiyun tp->snd_cwnd_cnt += ca->pkts_acked;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun ca->pkts_acked = 1;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
htcp_init(struct sock * sk)257*4882a593Smuzhiyun static void htcp_init(struct sock *sk)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun memset(ca, 0, sizeof(struct htcp));
262*4882a593Smuzhiyun ca->alpha = ALPHA_BASE;
263*4882a593Smuzhiyun ca->beta = BETA_MIN;
264*4882a593Smuzhiyun ca->pkts_acked = 1;
265*4882a593Smuzhiyun ca->last_cong = jiffies;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun
htcp_state(struct sock * sk,u8 new_state)268*4882a593Smuzhiyun static void htcp_state(struct sock *sk, u8 new_state)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun switch (new_state) {
271*4882a593Smuzhiyun case TCP_CA_Open:
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun struct htcp *ca = inet_csk_ca(sk);
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun if (ca->undo_last_cong) {
276*4882a593Smuzhiyun ca->last_cong = jiffies;
277*4882a593Smuzhiyun ca->undo_last_cong = 0;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun break;
281*4882a593Smuzhiyun case TCP_CA_CWR:
282*4882a593Smuzhiyun case TCP_CA_Recovery:
283*4882a593Smuzhiyun case TCP_CA_Loss:
284*4882a593Smuzhiyun htcp_reset(inet_csk_ca(sk));
285*4882a593Smuzhiyun break;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun static struct tcp_congestion_ops htcp __read_mostly = {
290*4882a593Smuzhiyun .init = htcp_init,
291*4882a593Smuzhiyun .ssthresh = htcp_recalc_ssthresh,
292*4882a593Smuzhiyun .cong_avoid = htcp_cong_avoid,
293*4882a593Smuzhiyun .set_state = htcp_state,
294*4882a593Smuzhiyun .undo_cwnd = htcp_cwnd_undo,
295*4882a593Smuzhiyun .pkts_acked = measure_achieved_throughput,
296*4882a593Smuzhiyun .owner = THIS_MODULE,
297*4882a593Smuzhiyun .name = "htcp",
298*4882a593Smuzhiyun };
299*4882a593Smuzhiyun
htcp_register(void)300*4882a593Smuzhiyun static int __init htcp_register(void)
301*4882a593Smuzhiyun {
302*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
303*4882a593Smuzhiyun BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
304*4882a593Smuzhiyun return tcp_register_congestion_control(&htcp);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
htcp_unregister(void)307*4882a593Smuzhiyun static void __exit htcp_unregister(void)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun tcp_unregister_congestion_control(&htcp);
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun module_init(htcp_register);
313*4882a593Smuzhiyun module_exit(htcp_unregister);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun MODULE_AUTHOR("Baruch Even");
316*4882a593Smuzhiyun MODULE_LICENSE("GPL");
317*4882a593Smuzhiyun MODULE_DESCRIPTION("H-TCP");
318