xref: /OK3568_Linux_fs/kernel/samples/bpf/hbm_kern.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun  *
3*4882a593Smuzhiyun  * Copyright (c) 2019 Facebook
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or
6*4882a593Smuzhiyun  * modify it under the terms of version 2 of the GNU General Public
7*4882a593Smuzhiyun  * License as published by the Free Software Foundation.
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun #define KBUILD_MODNAME "foo"
12*4882a593Smuzhiyun #include <stddef.h>
13*4882a593Smuzhiyun #include <stdbool.h>
14*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
15*4882a593Smuzhiyun #include <uapi/linux/if_ether.h>
16*4882a593Smuzhiyun #include <uapi/linux/if_packet.h>
17*4882a593Smuzhiyun #include <uapi/linux/ip.h>
18*4882a593Smuzhiyun #include <uapi/linux/ipv6.h>
19*4882a593Smuzhiyun #include <uapi/linux/in.h>
20*4882a593Smuzhiyun #include <uapi/linux/tcp.h>
21*4882a593Smuzhiyun #include <uapi/linux/filter.h>
22*4882a593Smuzhiyun #include <uapi/linux/pkt_cls.h>
23*4882a593Smuzhiyun #include <net/ipv6.h>
24*4882a593Smuzhiyun #include <net/inet_ecn.h>
25*4882a593Smuzhiyun #include <bpf/bpf_endian.h>
26*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
27*4882a593Smuzhiyun #include "hbm.h"
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #define DROP_PKT	0
30*4882a593Smuzhiyun #define ALLOW_PKT	1
31*4882a593Smuzhiyun #define TCP_ECN_OK	1
32*4882a593Smuzhiyun #define CWR		2
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
35*4882a593Smuzhiyun #undef bpf_printk
36*4882a593Smuzhiyun #define bpf_printk(fmt, ...)
37*4882a593Smuzhiyun #endif
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #define INITIAL_CREDIT_PACKETS	100
40*4882a593Smuzhiyun #define MAX_BYTES_PER_PACKET	1500
41*4882a593Smuzhiyun #define MARK_THRESH		(40 * MAX_BYTES_PER_PACKET)
42*4882a593Smuzhiyun #define DROP_THRESH		(80 * 5 * MAX_BYTES_PER_PACKET)
43*4882a593Smuzhiyun #define LARGE_PKT_DROP_THRESH	(DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
44*4882a593Smuzhiyun #define MARK_REGION_SIZE	(LARGE_PKT_DROP_THRESH - MARK_THRESH)
45*4882a593Smuzhiyun #define LARGE_PKT_THRESH	120
46*4882a593Smuzhiyun #define MAX_CREDIT		(100 * MAX_BYTES_PER_PACKET)
47*4882a593Smuzhiyun #define INIT_CREDIT		(INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun // Time base accounting for fq's EDT
50*4882a593Smuzhiyun #define BURST_SIZE_NS		100000 // 100us
51*4882a593Smuzhiyun #define MARK_THRESH_NS		50000 // 50us
52*4882a593Smuzhiyun #define DROP_THRESH_NS		500000 // 500us
53*4882a593Smuzhiyun // Reserve 20us of queuing for small packets (less than 120 bytes)
54*4882a593Smuzhiyun #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
55*4882a593Smuzhiyun #define MARK_REGION_SIZE_NS	(LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun // rate in bytes per ns << 20
58*4882a593Smuzhiyun #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
59*4882a593Smuzhiyun #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
60*4882a593Smuzhiyun #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun struct {
63*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
64*4882a593Smuzhiyun 	__type(key, struct bpf_cgroup_storage_key);
65*4882a593Smuzhiyun 	__type(value, struct hbm_vqueue);
66*4882a593Smuzhiyun } queue_state SEC(".maps");
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun struct {
69*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_ARRAY);
70*4882a593Smuzhiyun 	__uint(max_entries, 1);
71*4882a593Smuzhiyun 	__type(key, u32);
72*4882a593Smuzhiyun 	__type(value, struct hvm_queue_stats);
73*4882a593Smuzhiyun } queue_stats SEC(".maps");
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun struct hbm_pkt_info {
76*4882a593Smuzhiyun 	int	cwnd;
77*4882a593Smuzhiyun 	int	rtt;
78*4882a593Smuzhiyun 	int	packets_out;
79*4882a593Smuzhiyun 	bool	is_ip;
80*4882a593Smuzhiyun 	bool	is_tcp;
81*4882a593Smuzhiyun 	short	ecn;
82*4882a593Smuzhiyun };
83*4882a593Smuzhiyun 
get_tcp_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)84*4882a593Smuzhiyun static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	struct bpf_sock *sk;
87*4882a593Smuzhiyun 	struct bpf_tcp_sock *tp;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	sk = skb->sk;
90*4882a593Smuzhiyun 	if (sk) {
91*4882a593Smuzhiyun 		sk = bpf_sk_fullsock(sk);
92*4882a593Smuzhiyun 		if (sk) {
93*4882a593Smuzhiyun 			if (sk->protocol == IPPROTO_TCP) {
94*4882a593Smuzhiyun 				tp = bpf_tcp_sock(sk);
95*4882a593Smuzhiyun 				if (tp) {
96*4882a593Smuzhiyun 					pkti->cwnd = tp->snd_cwnd;
97*4882a593Smuzhiyun 					pkti->rtt = tp->srtt_us >> 3;
98*4882a593Smuzhiyun 					pkti->packets_out = tp->packets_out;
99*4882a593Smuzhiyun 					return 0;
100*4882a593Smuzhiyun 				}
101*4882a593Smuzhiyun 			}
102*4882a593Smuzhiyun 		}
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun 	pkti->cwnd = 0;
105*4882a593Smuzhiyun 	pkti->rtt = 0;
106*4882a593Smuzhiyun 	pkti->packets_out = 0;
107*4882a593Smuzhiyun 	return 1;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun 
hbm_get_pkt_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)110*4882a593Smuzhiyun static void hbm_get_pkt_info(struct __sk_buff *skb,
111*4882a593Smuzhiyun 			     struct hbm_pkt_info *pkti)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun 	struct iphdr iph;
114*4882a593Smuzhiyun 	struct ipv6hdr *ip6h;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	pkti->cwnd = 0;
117*4882a593Smuzhiyun 	pkti->rtt = 0;
118*4882a593Smuzhiyun 	bpf_skb_load_bytes(skb, 0, &iph, 12);
119*4882a593Smuzhiyun 	if (iph.version == 6) {
120*4882a593Smuzhiyun 		ip6h = (struct ipv6hdr *)&iph;
121*4882a593Smuzhiyun 		pkti->is_ip = true;
122*4882a593Smuzhiyun 		pkti->is_tcp = (ip6h->nexthdr == 6);
123*4882a593Smuzhiyun 		pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
124*4882a593Smuzhiyun 	} else if (iph.version == 4) {
125*4882a593Smuzhiyun 		pkti->is_ip = true;
126*4882a593Smuzhiyun 		pkti->is_tcp = (iph.protocol == 6);
127*4882a593Smuzhiyun 		pkti->ecn = iph.tos & INET_ECN_MASK;
128*4882a593Smuzhiyun 	} else {
129*4882a593Smuzhiyun 		pkti->is_ip = false;
130*4882a593Smuzhiyun 		pkti->is_tcp = false;
131*4882a593Smuzhiyun 		pkti->ecn = 0;
132*4882a593Smuzhiyun 	}
133*4882a593Smuzhiyun 	if (pkti->is_tcp)
134*4882a593Smuzhiyun 		get_tcp_info(skb, pkti);
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun 
hbm_init_vqueue(struct hbm_vqueue * qdp,int rate)137*4882a593Smuzhiyun static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
140*4882a593Smuzhiyun 	qdp->lasttime = bpf_ktime_get_ns();
141*4882a593Smuzhiyun 	qdp->credit = INIT_CREDIT;
142*4882a593Smuzhiyun 	qdp->rate = rate * 128;
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun 
hbm_init_edt_vqueue(struct hbm_vqueue * qdp,int rate)145*4882a593Smuzhiyun static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
146*4882a593Smuzhiyun 						int rate)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun 	unsigned long long curtime;
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	curtime = bpf_ktime_get_ns();
151*4882a593Smuzhiyun 	bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
152*4882a593Smuzhiyun 	qdp->lasttime = curtime - BURST_SIZE_NS;	// support initial burst
153*4882a593Smuzhiyun 	qdp->credit = 0;				// not used
154*4882a593Smuzhiyun 	qdp->rate = rate * 128;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun 
hbm_update_stats(struct hbm_queue_stats * qsp,int len,unsigned long long curtime,bool congestion_flag,bool drop_flag,bool cwr_flag,bool ecn_ce_flag,struct hbm_pkt_info * pkti,int credit)157*4882a593Smuzhiyun static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
158*4882a593Smuzhiyun 					     int len,
159*4882a593Smuzhiyun 					     unsigned long long curtime,
160*4882a593Smuzhiyun 					     bool congestion_flag,
161*4882a593Smuzhiyun 					     bool drop_flag,
162*4882a593Smuzhiyun 					     bool cwr_flag,
163*4882a593Smuzhiyun 					     bool ecn_ce_flag,
164*4882a593Smuzhiyun 					     struct hbm_pkt_info *pkti,
165*4882a593Smuzhiyun 					     int credit)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun 	int rv = ALLOW_PKT;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	if (qsp != NULL) {
170*4882a593Smuzhiyun 		// Following is needed for work conserving
171*4882a593Smuzhiyun 		__sync_add_and_fetch(&(qsp->bytes_total), len);
172*4882a593Smuzhiyun 		if (qsp->stats) {
173*4882a593Smuzhiyun 			// Optionally update statistics
174*4882a593Smuzhiyun 			if (qsp->firstPacketTime == 0)
175*4882a593Smuzhiyun 				qsp->firstPacketTime = curtime;
176*4882a593Smuzhiyun 			qsp->lastPacketTime = curtime;
177*4882a593Smuzhiyun 			__sync_add_and_fetch(&(qsp->pkts_total), 1);
178*4882a593Smuzhiyun 			if (congestion_flag) {
179*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->pkts_marked), 1);
180*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->bytes_marked), len);
181*4882a593Smuzhiyun 			}
182*4882a593Smuzhiyun 			if (drop_flag) {
183*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->pkts_dropped), 1);
184*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->bytes_dropped),
185*4882a593Smuzhiyun 						     len);
186*4882a593Smuzhiyun 			}
187*4882a593Smuzhiyun 			if (ecn_ce_flag)
188*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
189*4882a593Smuzhiyun 			if (pkti->cwnd) {
190*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->sum_cwnd),
191*4882a593Smuzhiyun 						     pkti->cwnd);
192*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
193*4882a593Smuzhiyun 			}
194*4882a593Smuzhiyun 			if (pkti->rtt)
195*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->sum_rtt),
196*4882a593Smuzhiyun 						     pkti->rtt);
197*4882a593Smuzhiyun 			__sync_add_and_fetch(&(qsp->sum_credit), credit);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 			if (drop_flag)
200*4882a593Smuzhiyun 				rv = DROP_PKT;
201*4882a593Smuzhiyun 			if (cwr_flag)
202*4882a593Smuzhiyun 				rv |= 2;
203*4882a593Smuzhiyun 			if (rv == DROP_PKT)
204*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->returnValCount[0]),
205*4882a593Smuzhiyun 						     1);
206*4882a593Smuzhiyun 			else if (rv == ALLOW_PKT)
207*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->returnValCount[1]),
208*4882a593Smuzhiyun 						     1);
209*4882a593Smuzhiyun 			else if (rv == 2)
210*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->returnValCount[2]),
211*4882a593Smuzhiyun 						     1);
212*4882a593Smuzhiyun 			else if (rv == 3)
213*4882a593Smuzhiyun 				__sync_add_and_fetch(&(qsp->returnValCount[3]),
214*4882a593Smuzhiyun 						     1);
215*4882a593Smuzhiyun 		}
216*4882a593Smuzhiyun 	}
217*4882a593Smuzhiyun }
218