1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun *
3*4882a593Smuzhiyun * Copyright (c) 2019 Facebook
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
6*4882a593Smuzhiyun * modify it under the terms of version 2 of the GNU General Public
7*4882a593Smuzhiyun * License as published by the Free Software Foundation.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun #define KBUILD_MODNAME "foo"
12*4882a593Smuzhiyun #include <stddef.h>
13*4882a593Smuzhiyun #include <stdbool.h>
14*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
15*4882a593Smuzhiyun #include <uapi/linux/if_ether.h>
16*4882a593Smuzhiyun #include <uapi/linux/if_packet.h>
17*4882a593Smuzhiyun #include <uapi/linux/ip.h>
18*4882a593Smuzhiyun #include <uapi/linux/ipv6.h>
19*4882a593Smuzhiyun #include <uapi/linux/in.h>
20*4882a593Smuzhiyun #include <uapi/linux/tcp.h>
21*4882a593Smuzhiyun #include <uapi/linux/filter.h>
22*4882a593Smuzhiyun #include <uapi/linux/pkt_cls.h>
23*4882a593Smuzhiyun #include <net/ipv6.h>
24*4882a593Smuzhiyun #include <net/inet_ecn.h>
25*4882a593Smuzhiyun #include <bpf/bpf_endian.h>
26*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
27*4882a593Smuzhiyun #include "hbm.h"
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #define DROP_PKT 0
30*4882a593Smuzhiyun #define ALLOW_PKT 1
31*4882a593Smuzhiyun #define TCP_ECN_OK 1
32*4882a593Smuzhiyun #define CWR 2
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
35*4882a593Smuzhiyun #undef bpf_printk
36*4882a593Smuzhiyun #define bpf_printk(fmt, ...)
37*4882a593Smuzhiyun #endif
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun #define INITIAL_CREDIT_PACKETS 100
40*4882a593Smuzhiyun #define MAX_BYTES_PER_PACKET 1500
41*4882a593Smuzhiyun #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
42*4882a593Smuzhiyun #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
43*4882a593Smuzhiyun #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
44*4882a593Smuzhiyun #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
45*4882a593Smuzhiyun #define LARGE_PKT_THRESH 120
46*4882a593Smuzhiyun #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
47*4882a593Smuzhiyun #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun // Time base accounting for fq's EDT
50*4882a593Smuzhiyun #define BURST_SIZE_NS 100000 // 100us
51*4882a593Smuzhiyun #define MARK_THRESH_NS 50000 // 50us
52*4882a593Smuzhiyun #define DROP_THRESH_NS 500000 // 500us
53*4882a593Smuzhiyun // Reserve 20us of queuing for small packets (less than 120 bytes)
54*4882a593Smuzhiyun #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
55*4882a593Smuzhiyun #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun // rate in bytes per ns << 20
58*4882a593Smuzhiyun #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
59*4882a593Smuzhiyun #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
60*4882a593Smuzhiyun #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun struct {
63*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
64*4882a593Smuzhiyun __type(key, struct bpf_cgroup_storage_key);
65*4882a593Smuzhiyun __type(value, struct hbm_vqueue);
66*4882a593Smuzhiyun } queue_state SEC(".maps");
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun struct {
69*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_ARRAY);
70*4882a593Smuzhiyun __uint(max_entries, 1);
71*4882a593Smuzhiyun __type(key, u32);
72*4882a593Smuzhiyun __type(value, struct hvm_queue_stats);
73*4882a593Smuzhiyun } queue_stats SEC(".maps");
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun struct hbm_pkt_info {
76*4882a593Smuzhiyun int cwnd;
77*4882a593Smuzhiyun int rtt;
78*4882a593Smuzhiyun int packets_out;
79*4882a593Smuzhiyun bool is_ip;
80*4882a593Smuzhiyun bool is_tcp;
81*4882a593Smuzhiyun short ecn;
82*4882a593Smuzhiyun };
83*4882a593Smuzhiyun
get_tcp_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)84*4882a593Smuzhiyun static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun struct bpf_sock *sk;
87*4882a593Smuzhiyun struct bpf_tcp_sock *tp;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun sk = skb->sk;
90*4882a593Smuzhiyun if (sk) {
91*4882a593Smuzhiyun sk = bpf_sk_fullsock(sk);
92*4882a593Smuzhiyun if (sk) {
93*4882a593Smuzhiyun if (sk->protocol == IPPROTO_TCP) {
94*4882a593Smuzhiyun tp = bpf_tcp_sock(sk);
95*4882a593Smuzhiyun if (tp) {
96*4882a593Smuzhiyun pkti->cwnd = tp->snd_cwnd;
97*4882a593Smuzhiyun pkti->rtt = tp->srtt_us >> 3;
98*4882a593Smuzhiyun pkti->packets_out = tp->packets_out;
99*4882a593Smuzhiyun return 0;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun pkti->cwnd = 0;
105*4882a593Smuzhiyun pkti->rtt = 0;
106*4882a593Smuzhiyun pkti->packets_out = 0;
107*4882a593Smuzhiyun return 1;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun
hbm_get_pkt_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)110*4882a593Smuzhiyun static void hbm_get_pkt_info(struct __sk_buff *skb,
111*4882a593Smuzhiyun struct hbm_pkt_info *pkti)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun struct iphdr iph;
114*4882a593Smuzhiyun struct ipv6hdr *ip6h;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun pkti->cwnd = 0;
117*4882a593Smuzhiyun pkti->rtt = 0;
118*4882a593Smuzhiyun bpf_skb_load_bytes(skb, 0, &iph, 12);
119*4882a593Smuzhiyun if (iph.version == 6) {
120*4882a593Smuzhiyun ip6h = (struct ipv6hdr *)&iph;
121*4882a593Smuzhiyun pkti->is_ip = true;
122*4882a593Smuzhiyun pkti->is_tcp = (ip6h->nexthdr == 6);
123*4882a593Smuzhiyun pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
124*4882a593Smuzhiyun } else if (iph.version == 4) {
125*4882a593Smuzhiyun pkti->is_ip = true;
126*4882a593Smuzhiyun pkti->is_tcp = (iph.protocol == 6);
127*4882a593Smuzhiyun pkti->ecn = iph.tos & INET_ECN_MASK;
128*4882a593Smuzhiyun } else {
129*4882a593Smuzhiyun pkti->is_ip = false;
130*4882a593Smuzhiyun pkti->is_tcp = false;
131*4882a593Smuzhiyun pkti->ecn = 0;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun if (pkti->is_tcp)
134*4882a593Smuzhiyun get_tcp_info(skb, pkti);
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
hbm_init_vqueue(struct hbm_vqueue * qdp,int rate)137*4882a593Smuzhiyun static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
140*4882a593Smuzhiyun qdp->lasttime = bpf_ktime_get_ns();
141*4882a593Smuzhiyun qdp->credit = INIT_CREDIT;
142*4882a593Smuzhiyun qdp->rate = rate * 128;
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun
hbm_init_edt_vqueue(struct hbm_vqueue * qdp,int rate)145*4882a593Smuzhiyun static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
146*4882a593Smuzhiyun int rate)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun unsigned long long curtime;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun curtime = bpf_ktime_get_ns();
151*4882a593Smuzhiyun bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
152*4882a593Smuzhiyun qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
153*4882a593Smuzhiyun qdp->credit = 0; // not used
154*4882a593Smuzhiyun qdp->rate = rate * 128;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
hbm_update_stats(struct hbm_queue_stats * qsp,int len,unsigned long long curtime,bool congestion_flag,bool drop_flag,bool cwr_flag,bool ecn_ce_flag,struct hbm_pkt_info * pkti,int credit)157*4882a593Smuzhiyun static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
158*4882a593Smuzhiyun int len,
159*4882a593Smuzhiyun unsigned long long curtime,
160*4882a593Smuzhiyun bool congestion_flag,
161*4882a593Smuzhiyun bool drop_flag,
162*4882a593Smuzhiyun bool cwr_flag,
163*4882a593Smuzhiyun bool ecn_ce_flag,
164*4882a593Smuzhiyun struct hbm_pkt_info *pkti,
165*4882a593Smuzhiyun int credit)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun int rv = ALLOW_PKT;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun if (qsp != NULL) {
170*4882a593Smuzhiyun // Following is needed for work conserving
171*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->bytes_total), len);
172*4882a593Smuzhiyun if (qsp->stats) {
173*4882a593Smuzhiyun // Optionally update statistics
174*4882a593Smuzhiyun if (qsp->firstPacketTime == 0)
175*4882a593Smuzhiyun qsp->firstPacketTime = curtime;
176*4882a593Smuzhiyun qsp->lastPacketTime = curtime;
177*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->pkts_total), 1);
178*4882a593Smuzhiyun if (congestion_flag) {
179*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->pkts_marked), 1);
180*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->bytes_marked), len);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun if (drop_flag) {
183*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
184*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->bytes_dropped),
185*4882a593Smuzhiyun len);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun if (ecn_ce_flag)
188*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
189*4882a593Smuzhiyun if (pkti->cwnd) {
190*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->sum_cwnd),
191*4882a593Smuzhiyun pkti->cwnd);
192*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun if (pkti->rtt)
195*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->sum_rtt),
196*4882a593Smuzhiyun pkti->rtt);
197*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->sum_credit), credit);
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun if (drop_flag)
200*4882a593Smuzhiyun rv = DROP_PKT;
201*4882a593Smuzhiyun if (cwr_flag)
202*4882a593Smuzhiyun rv |= 2;
203*4882a593Smuzhiyun if (rv == DROP_PKT)
204*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->returnValCount[0]),
205*4882a593Smuzhiyun 1);
206*4882a593Smuzhiyun else if (rv == ALLOW_PKT)
207*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->returnValCount[1]),
208*4882a593Smuzhiyun 1);
209*4882a593Smuzhiyun else if (rv == 2)
210*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->returnValCount[2]),
211*4882a593Smuzhiyun 1);
212*4882a593Smuzhiyun else if (rv == 3)
213*4882a593Smuzhiyun __sync_add_and_fetch(&(qsp->returnValCount[3]),
214*4882a593Smuzhiyun 1);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun }
218