1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Copyright (c) 2019 Facebook
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
5*4882a593Smuzhiyun * modify it under the terms of version 2 of the GNU General Public
6*4882a593Smuzhiyun * License as published by the Free Software Foundation.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Example program for Host Bandwidth Managment
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * This program loads a cgroup skb BPF program to enforce cgroup output
11*4882a593Smuzhiyun * (egress) or input (ingress) bandwidth limits.
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
14*4882a593Smuzhiyun * Where:
15*4882a593Smuzhiyun * -d Print BPF trace debug buffer
16*4882a593Smuzhiyun * -l Also limit flows doing loopback
17*4882a593Smuzhiyun * -n <#> To create cgroup \"/hbm#\" and attach prog
18*4882a593Smuzhiyun * Default is /hbm1
19*4882a593Smuzhiyun * --no_cn Do not return cn notifications
20*4882a593Smuzhiyun * -r <rate> Rate limit in Mbps
21*4882a593Smuzhiyun * -s Get HBM stats (marked, dropped, etc.)
22*4882a593Smuzhiyun * -t <time> Exit after specified seconds (default is 0)
23*4882a593Smuzhiyun * -w Work conserving flag. cgroup can increase its bandwidth
24*4882a593Smuzhiyun * beyond the rate limit specified while there is available
25*4882a593Smuzhiyun * bandwidth. Current implementation assumes there is only
26*4882a593Smuzhiyun * NIC (eth0), but can be extended to support multiple NICs.
27*4882a593Smuzhiyun * Currrently only supported for egress.
28*4882a593Smuzhiyun * -h Print this info
29*4882a593Smuzhiyun * prog BPF program file name. Name defaults to hbm_out_kern.o
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #define _GNU_SOURCE
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include <stdio.h>
35*4882a593Smuzhiyun #include <stdlib.h>
36*4882a593Smuzhiyun #include <assert.h>
37*4882a593Smuzhiyun #include <sys/resource.h>
38*4882a593Smuzhiyun #include <sys/time.h>
39*4882a593Smuzhiyun #include <unistd.h>
40*4882a593Smuzhiyun #include <errno.h>
41*4882a593Smuzhiyun #include <fcntl.h>
42*4882a593Smuzhiyun #include <linux/unistd.h>
43*4882a593Smuzhiyun #include <linux/compiler.h>
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #include <linux/bpf.h>
46*4882a593Smuzhiyun #include <bpf/bpf.h>
47*4882a593Smuzhiyun #include <getopt.h>
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun #include "bpf_load.h"
50*4882a593Smuzhiyun #include "bpf_rlimit.h"
51*4882a593Smuzhiyun #include "cgroup_helpers.h"
52*4882a593Smuzhiyun #include "hbm.h"
53*4882a593Smuzhiyun #include "bpf_util.h"
54*4882a593Smuzhiyun #include <bpf/bpf.h>
55*4882a593Smuzhiyun #include <bpf/libbpf.h>
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun bool outFlag = true;
58*4882a593Smuzhiyun int minRate = 1000; /* cgroup rate limit in Mbps */
59*4882a593Smuzhiyun int rate = 1000; /* can grow if rate conserving is enabled */
60*4882a593Smuzhiyun int dur = 1;
61*4882a593Smuzhiyun bool stats_flag;
62*4882a593Smuzhiyun bool loopback_flag;
63*4882a593Smuzhiyun bool debugFlag;
64*4882a593Smuzhiyun bool work_conserving_flag;
65*4882a593Smuzhiyun bool no_cn_flag;
66*4882a593Smuzhiyun bool edt_flag;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun static void Usage(void);
69*4882a593Smuzhiyun static void read_trace_pipe2(void);
70*4882a593Smuzhiyun static void do_error(char *msg, bool errno_flag);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun #define DEBUGFS "/sys/kernel/debug/tracing/"
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun struct bpf_object *obj;
75*4882a593Smuzhiyun int bpfprog_fd;
76*4882a593Smuzhiyun int cgroup_storage_fd;
77*4882a593Smuzhiyun
read_trace_pipe2(void)78*4882a593Smuzhiyun static void read_trace_pipe2(void)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun int trace_fd;
81*4882a593Smuzhiyun FILE *outf;
82*4882a593Smuzhiyun char *outFname = "hbm_out.log";
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
85*4882a593Smuzhiyun if (trace_fd < 0) {
86*4882a593Smuzhiyun printf("Error opening trace_pipe\n");
87*4882a593Smuzhiyun return;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun // Future support of ingress
91*4882a593Smuzhiyun // if (!outFlag)
92*4882a593Smuzhiyun // outFname = "hbm_in.log";
93*4882a593Smuzhiyun outf = fopen(outFname, "w");
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun if (outf == NULL)
96*4882a593Smuzhiyun printf("Error creating %s\n", outFname);
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun while (1) {
99*4882a593Smuzhiyun static char buf[4097];
100*4882a593Smuzhiyun ssize_t sz;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun sz = read(trace_fd, buf, sizeof(buf) - 1);
103*4882a593Smuzhiyun if (sz > 0) {
104*4882a593Smuzhiyun buf[sz] = 0;
105*4882a593Smuzhiyun puts(buf);
106*4882a593Smuzhiyun if (outf != NULL) {
107*4882a593Smuzhiyun fprintf(outf, "%s\n", buf);
108*4882a593Smuzhiyun fflush(outf);
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
do_error(char * msg,bool errno_flag)114*4882a593Smuzhiyun static void do_error(char *msg, bool errno_flag)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun if (errno_flag)
117*4882a593Smuzhiyun printf("ERROR: %s, errno: %d\n", msg, errno);
118*4882a593Smuzhiyun else
119*4882a593Smuzhiyun printf("ERROR: %s\n", msg);
120*4882a593Smuzhiyun exit(1);
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
prog_load(char * prog)123*4882a593Smuzhiyun static int prog_load(char *prog)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun struct bpf_prog_load_attr prog_load_attr = {
126*4882a593Smuzhiyun .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
127*4882a593Smuzhiyun .file = prog,
128*4882a593Smuzhiyun .expected_attach_type = BPF_CGROUP_INET_EGRESS,
129*4882a593Smuzhiyun };
130*4882a593Smuzhiyun int map_fd;
131*4882a593Smuzhiyun struct bpf_map *map;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun int ret = 0;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun if (access(prog, O_RDONLY) < 0) {
136*4882a593Smuzhiyun printf("Error accessing file %s: %s\n", prog, strerror(errno));
137*4882a593Smuzhiyun return 1;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
140*4882a593Smuzhiyun ret = 1;
141*4882a593Smuzhiyun if (!ret) {
142*4882a593Smuzhiyun map = bpf_object__find_map_by_name(obj, "queue_stats");
143*4882a593Smuzhiyun map_fd = bpf_map__fd(map);
144*4882a593Smuzhiyun if (map_fd < 0) {
145*4882a593Smuzhiyun printf("Map not found: %s\n", strerror(map_fd));
146*4882a593Smuzhiyun ret = 1;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun if (ret) {
151*4882a593Smuzhiyun printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog);
152*4882a593Smuzhiyun printf(" Output from verifier:\n%s\n------\n", bpf_log_buf);
153*4882a593Smuzhiyun ret = -1;
154*4882a593Smuzhiyun } else {
155*4882a593Smuzhiyun ret = map_fd;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun return ret;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
run_bpf_prog(char * prog,int cg_id)161*4882a593Smuzhiyun static int run_bpf_prog(char *prog, int cg_id)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun int map_fd;
164*4882a593Smuzhiyun int rc = 0;
165*4882a593Smuzhiyun int key = 0;
166*4882a593Smuzhiyun int cg1 = 0;
167*4882a593Smuzhiyun int type = BPF_CGROUP_INET_EGRESS;
168*4882a593Smuzhiyun char cg_dir[100];
169*4882a593Smuzhiyun struct hbm_queue_stats qstats = {0};
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun sprintf(cg_dir, "/hbm%d", cg_id);
172*4882a593Smuzhiyun map_fd = prog_load(prog);
173*4882a593Smuzhiyun if (map_fd == -1)
174*4882a593Smuzhiyun return 1;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun if (setup_cgroup_environment()) {
177*4882a593Smuzhiyun printf("ERROR: setting cgroup environment\n");
178*4882a593Smuzhiyun goto err;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun cg1 = create_and_get_cgroup(cg_dir);
181*4882a593Smuzhiyun if (!cg1) {
182*4882a593Smuzhiyun printf("ERROR: create_and_get_cgroup\n");
183*4882a593Smuzhiyun goto err;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun if (join_cgroup(cg_dir)) {
186*4882a593Smuzhiyun printf("ERROR: join_cgroup\n");
187*4882a593Smuzhiyun goto err;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun qstats.rate = rate;
191*4882a593Smuzhiyun qstats.stats = stats_flag ? 1 : 0;
192*4882a593Smuzhiyun qstats.loopback = loopback_flag ? 1 : 0;
193*4882a593Smuzhiyun qstats.no_cn = no_cn_flag ? 1 : 0;
194*4882a593Smuzhiyun if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
195*4882a593Smuzhiyun printf("ERROR: Could not update map element\n");
196*4882a593Smuzhiyun goto err;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun if (!outFlag)
200*4882a593Smuzhiyun type = BPF_CGROUP_INET_INGRESS;
201*4882a593Smuzhiyun if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
202*4882a593Smuzhiyun printf("ERROR: bpf_prog_attach fails!\n");
203*4882a593Smuzhiyun log_err("Attaching prog");
204*4882a593Smuzhiyun goto err;
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun if (work_conserving_flag) {
208*4882a593Smuzhiyun struct timeval t0, t_last, t_new;
209*4882a593Smuzhiyun FILE *fin;
210*4882a593Smuzhiyun unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
211*4882a593Smuzhiyun signed long long last_cg_tx_bytes, new_cg_tx_bytes;
212*4882a593Smuzhiyun signed long long delta_time, delta_bytes, delta_rate;
213*4882a593Smuzhiyun int delta_ms;
214*4882a593Smuzhiyun #define DELTA_RATE_CHECK 10000 /* in us */
215*4882a593Smuzhiyun #define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun bpf_map_lookup_elem(map_fd, &key, &qstats);
218*4882a593Smuzhiyun if (gettimeofday(&t0, NULL) < 0)
219*4882a593Smuzhiyun do_error("gettimeofday failed", true);
220*4882a593Smuzhiyun t_last = t0;
221*4882a593Smuzhiyun fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
222*4882a593Smuzhiyun if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
223*4882a593Smuzhiyun do_error("fscanf fails", false);
224*4882a593Smuzhiyun fclose(fin);
225*4882a593Smuzhiyun last_cg_tx_bytes = qstats.bytes_total;
226*4882a593Smuzhiyun while (true) {
227*4882a593Smuzhiyun usleep(DELTA_RATE_CHECK);
228*4882a593Smuzhiyun if (gettimeofday(&t_new, NULL) < 0)
229*4882a593Smuzhiyun do_error("gettimeofday failed", true);
230*4882a593Smuzhiyun delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
231*4882a593Smuzhiyun (t_new.tv_usec - t0.tv_usec)/1000;
232*4882a593Smuzhiyun if (delta_ms > dur * 1000)
233*4882a593Smuzhiyun break;
234*4882a593Smuzhiyun delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
235*4882a593Smuzhiyun (t_new.tv_usec - t_last.tv_usec);
236*4882a593Smuzhiyun if (delta_time == 0)
237*4882a593Smuzhiyun continue;
238*4882a593Smuzhiyun t_last = t_new;
239*4882a593Smuzhiyun fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
240*4882a593Smuzhiyun "r");
241*4882a593Smuzhiyun if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
242*4882a593Smuzhiyun do_error("fscanf fails", false);
243*4882a593Smuzhiyun fclose(fin);
244*4882a593Smuzhiyun printf(" new_eth_tx_bytes:%llu\n",
245*4882a593Smuzhiyun new_eth_tx_bytes);
246*4882a593Smuzhiyun bpf_map_lookup_elem(map_fd, &key, &qstats);
247*4882a593Smuzhiyun new_cg_tx_bytes = qstats.bytes_total;
248*4882a593Smuzhiyun delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
249*4882a593Smuzhiyun last_eth_tx_bytes = new_eth_tx_bytes;
250*4882a593Smuzhiyun delta_rate = (delta_bytes * 8000000) / delta_time;
251*4882a593Smuzhiyun printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
252*4882a593Smuzhiyun delta_ms, delta_rate/1000000000.0,
253*4882a593Smuzhiyun rate/1000.0);
254*4882a593Smuzhiyun if (delta_rate < RATE_THRESHOLD) {
255*4882a593Smuzhiyun /* can increase cgroup rate limit, but first
256*4882a593Smuzhiyun * check if we are using the current limit.
257*4882a593Smuzhiyun * Currently increasing by 6.25%, unknown
258*4882a593Smuzhiyun * if that is the optimal rate.
259*4882a593Smuzhiyun */
260*4882a593Smuzhiyun int rate_diff100;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun delta_bytes = new_cg_tx_bytes -
263*4882a593Smuzhiyun last_cg_tx_bytes;
264*4882a593Smuzhiyun last_cg_tx_bytes = new_cg_tx_bytes;
265*4882a593Smuzhiyun delta_rate = (delta_bytes * 8000000) /
266*4882a593Smuzhiyun delta_time;
267*4882a593Smuzhiyun printf(" rate:%.3fGbps",
268*4882a593Smuzhiyun delta_rate/1000000000.0);
269*4882a593Smuzhiyun rate_diff100 = (((long long)rate)*1000000 -
270*4882a593Smuzhiyun delta_rate) * 100 /
271*4882a593Smuzhiyun (((long long) rate) * 1000000);
272*4882a593Smuzhiyun printf(" rdiff:%d", rate_diff100);
273*4882a593Smuzhiyun if (rate_diff100 <= 3) {
274*4882a593Smuzhiyun rate += (rate >> 4);
275*4882a593Smuzhiyun if (rate > RATE_THRESHOLD / 1000000)
276*4882a593Smuzhiyun rate = RATE_THRESHOLD / 1000000;
277*4882a593Smuzhiyun qstats.rate = rate;
278*4882a593Smuzhiyun printf(" INC\n");
279*4882a593Smuzhiyun } else {
280*4882a593Smuzhiyun printf("\n");
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun } else {
283*4882a593Smuzhiyun /* Need to decrease cgroup rate limit.
284*4882a593Smuzhiyun * Currently decreasing by 12.5%, unknown
285*4882a593Smuzhiyun * if that is optimal
286*4882a593Smuzhiyun */
287*4882a593Smuzhiyun printf(" DEC\n");
288*4882a593Smuzhiyun rate -= (rate >> 3);
289*4882a593Smuzhiyun if (rate < minRate)
290*4882a593Smuzhiyun rate = minRate;
291*4882a593Smuzhiyun qstats.rate = rate;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY))
294*4882a593Smuzhiyun do_error("update map element fails", false);
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun } else {
297*4882a593Smuzhiyun sleep(dur);
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun // Get stats!
300*4882a593Smuzhiyun if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) {
301*4882a593Smuzhiyun char fname[100];
302*4882a593Smuzhiyun FILE *fout;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun if (!outFlag)
305*4882a593Smuzhiyun sprintf(fname, "hbm.%d.in", cg_id);
306*4882a593Smuzhiyun else
307*4882a593Smuzhiyun sprintf(fname, "hbm.%d.out", cg_id);
308*4882a593Smuzhiyun fout = fopen(fname, "w");
309*4882a593Smuzhiyun fprintf(fout, "id:%d\n", cg_id);
310*4882a593Smuzhiyun fprintf(fout, "ERROR: Could not lookup queue_stats\n");
311*4882a593Smuzhiyun } else if (stats_flag && qstats.lastPacketTime >
312*4882a593Smuzhiyun qstats.firstPacketTime) {
313*4882a593Smuzhiyun long long delta_us = (qstats.lastPacketTime -
314*4882a593Smuzhiyun qstats.firstPacketTime)/1000;
315*4882a593Smuzhiyun unsigned int rate_mbps = ((qstats.bytes_total -
316*4882a593Smuzhiyun qstats.bytes_dropped) * 8 /
317*4882a593Smuzhiyun delta_us);
318*4882a593Smuzhiyun double percent_pkts, percent_bytes;
319*4882a593Smuzhiyun char fname[100];
320*4882a593Smuzhiyun FILE *fout;
321*4882a593Smuzhiyun int k;
322*4882a593Smuzhiyun static const char *returnValNames[] = {
323*4882a593Smuzhiyun "DROP_PKT",
324*4882a593Smuzhiyun "ALLOW_PKT",
325*4882a593Smuzhiyun "DROP_PKT_CWR",
326*4882a593Smuzhiyun "ALLOW_PKT_CWR"
327*4882a593Smuzhiyun };
328*4882a593Smuzhiyun #define RET_VAL_COUNT 4
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun // Future support of ingress
331*4882a593Smuzhiyun // if (!outFlag)
332*4882a593Smuzhiyun // sprintf(fname, "hbm.%d.in", cg_id);
333*4882a593Smuzhiyun // else
334*4882a593Smuzhiyun sprintf(fname, "hbm.%d.out", cg_id);
335*4882a593Smuzhiyun fout = fopen(fname, "w");
336*4882a593Smuzhiyun fprintf(fout, "id:%d\n", cg_id);
337*4882a593Smuzhiyun fprintf(fout, "rate_mbps:%d\n", rate_mbps);
338*4882a593Smuzhiyun fprintf(fout, "duration:%.1f secs\n",
339*4882a593Smuzhiyun (qstats.lastPacketTime - qstats.firstPacketTime) /
340*4882a593Smuzhiyun 1000000000.0);
341*4882a593Smuzhiyun fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
342*4882a593Smuzhiyun fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
343*4882a593Smuzhiyun 1000000));
344*4882a593Smuzhiyun fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
345*4882a593Smuzhiyun fprintf(fout, "bytes_dropped_MB:%d\n",
346*4882a593Smuzhiyun (int)(qstats.bytes_dropped /
347*4882a593Smuzhiyun 1000000));
348*4882a593Smuzhiyun // Marked Pkts and Bytes
349*4882a593Smuzhiyun percent_pkts = (qstats.pkts_marked * 100.0) /
350*4882a593Smuzhiyun (qstats.pkts_total + 1);
351*4882a593Smuzhiyun percent_bytes = (qstats.bytes_marked * 100.0) /
352*4882a593Smuzhiyun (qstats.bytes_total + 1);
353*4882a593Smuzhiyun fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
354*4882a593Smuzhiyun fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun // Dropped Pkts and Bytes
357*4882a593Smuzhiyun percent_pkts = (qstats.pkts_dropped * 100.0) /
358*4882a593Smuzhiyun (qstats.pkts_total + 1);
359*4882a593Smuzhiyun percent_bytes = (qstats.bytes_dropped * 100.0) /
360*4882a593Smuzhiyun (qstats.bytes_total + 1);
361*4882a593Smuzhiyun fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
362*4882a593Smuzhiyun fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun // ECN CE markings
365*4882a593Smuzhiyun percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
366*4882a593Smuzhiyun (qstats.pkts_total + 1);
367*4882a593Smuzhiyun fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
368*4882a593Smuzhiyun (int)qstats.pkts_ecn_ce);
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun // Average cwnd
371*4882a593Smuzhiyun fprintf(fout, "avg cwnd:%d\n",
372*4882a593Smuzhiyun (int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
373*4882a593Smuzhiyun // Average rtt
374*4882a593Smuzhiyun fprintf(fout, "avg rtt:%d\n",
375*4882a593Smuzhiyun (int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
376*4882a593Smuzhiyun // Average credit
377*4882a593Smuzhiyun if (edt_flag)
378*4882a593Smuzhiyun fprintf(fout, "avg credit_ms:%.03f\n",
379*4882a593Smuzhiyun (qstats.sum_credit /
380*4882a593Smuzhiyun (qstats.pkts_total + 1.0)) / 1000000.0);
381*4882a593Smuzhiyun else
382*4882a593Smuzhiyun fprintf(fout, "avg credit:%d\n",
383*4882a593Smuzhiyun (int)(qstats.sum_credit /
384*4882a593Smuzhiyun (1500 * ((int)qstats.pkts_total ) + 1)));
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun // Return values stats
387*4882a593Smuzhiyun for (k = 0; k < RET_VAL_COUNT; k++) {
388*4882a593Smuzhiyun percent_pkts = (qstats.returnValCount[k] * 100.0) /
389*4882a593Smuzhiyun (qstats.pkts_total + 1);
390*4882a593Smuzhiyun fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
391*4882a593Smuzhiyun percent_pkts, (int)qstats.returnValCount[k]);
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun fclose(fout);
394*4882a593Smuzhiyun }
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun if (debugFlag)
397*4882a593Smuzhiyun read_trace_pipe2();
398*4882a593Smuzhiyun return rc;
399*4882a593Smuzhiyun err:
400*4882a593Smuzhiyun rc = 1;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun if (cg1)
403*4882a593Smuzhiyun close(cg1);
404*4882a593Smuzhiyun cleanup_cgroup_environment();
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun return rc;
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun
Usage(void)409*4882a593Smuzhiyun static void Usage(void)
410*4882a593Smuzhiyun {
411*4882a593Smuzhiyun printf("This program loads a cgroup skb BPF program to enforce\n"
412*4882a593Smuzhiyun "cgroup output (egress) bandwidth limits.\n\n"
413*4882a593Smuzhiyun "USAGE: hbm [-o] [-d] [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
414*4882a593Smuzhiyun " [-s] [-t <secs>] [-w] [-h] [prog]\n"
415*4882a593Smuzhiyun " Where:\n"
416*4882a593Smuzhiyun " -o indicates egress direction (default)\n"
417*4882a593Smuzhiyun " -d print BPF trace debug buffer\n"
418*4882a593Smuzhiyun " --edt use fq's Earliest Departure Time\n"
419*4882a593Smuzhiyun " -l also limit flows using loopback\n"
420*4882a593Smuzhiyun " -n <#> to create cgroup \"/hbm#\" and attach prog\n"
421*4882a593Smuzhiyun " Default is /hbm1\n"
422*4882a593Smuzhiyun " --no_cn disable CN notifications\n"
423*4882a593Smuzhiyun " -r <rate> Rate in Mbps\n"
424*4882a593Smuzhiyun " -s Update HBM stats\n"
425*4882a593Smuzhiyun " -t <time> Exit after specified seconds (default is 0)\n"
426*4882a593Smuzhiyun " -w Work conserving flag. cgroup can increase\n"
427*4882a593Smuzhiyun " bandwidth beyond the rate limit specified\n"
428*4882a593Smuzhiyun " while there is available bandwidth. Current\n"
429*4882a593Smuzhiyun " implementation assumes there is only eth0\n"
430*4882a593Smuzhiyun " but can be extended to support multiple NICs\n"
431*4882a593Smuzhiyun " -h print this info\n"
432*4882a593Smuzhiyun " prog BPF program file name. Name defaults to\n"
433*4882a593Smuzhiyun " hbm_out_kern.o\n");
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
main(int argc,char ** argv)436*4882a593Smuzhiyun int main(int argc, char **argv)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun char *prog = "hbm_out_kern.o";
439*4882a593Smuzhiyun int k;
440*4882a593Smuzhiyun int cg_id = 1;
441*4882a593Smuzhiyun char *optstring = "iodln:r:st:wh";
442*4882a593Smuzhiyun struct option loptions[] = {
443*4882a593Smuzhiyun {"no_cn", 0, NULL, 1},
444*4882a593Smuzhiyun {"edt", 0, NULL, 2},
445*4882a593Smuzhiyun {NULL, 0, NULL, 0}
446*4882a593Smuzhiyun };
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
449*4882a593Smuzhiyun switch (k) {
450*4882a593Smuzhiyun case 1:
451*4882a593Smuzhiyun no_cn_flag = true;
452*4882a593Smuzhiyun break;
453*4882a593Smuzhiyun case 2:
454*4882a593Smuzhiyun prog = "hbm_edt_kern.o";
455*4882a593Smuzhiyun edt_flag = true;
456*4882a593Smuzhiyun break;
457*4882a593Smuzhiyun case'o':
458*4882a593Smuzhiyun break;
459*4882a593Smuzhiyun case 'd':
460*4882a593Smuzhiyun debugFlag = true;
461*4882a593Smuzhiyun break;
462*4882a593Smuzhiyun case 'l':
463*4882a593Smuzhiyun loopback_flag = true;
464*4882a593Smuzhiyun break;
465*4882a593Smuzhiyun case 'n':
466*4882a593Smuzhiyun cg_id = atoi(optarg);
467*4882a593Smuzhiyun break;
468*4882a593Smuzhiyun case 'r':
469*4882a593Smuzhiyun minRate = atoi(optarg) * 1.024;
470*4882a593Smuzhiyun rate = minRate;
471*4882a593Smuzhiyun break;
472*4882a593Smuzhiyun case 's':
473*4882a593Smuzhiyun stats_flag = true;
474*4882a593Smuzhiyun break;
475*4882a593Smuzhiyun case 't':
476*4882a593Smuzhiyun dur = atoi(optarg);
477*4882a593Smuzhiyun break;
478*4882a593Smuzhiyun case 'w':
479*4882a593Smuzhiyun work_conserving_flag = true;
480*4882a593Smuzhiyun break;
481*4882a593Smuzhiyun case '?':
482*4882a593Smuzhiyun if (optopt == 'n' || optopt == 'r' || optopt == 't')
483*4882a593Smuzhiyun fprintf(stderr,
484*4882a593Smuzhiyun "Option -%c requires an argument.\n\n",
485*4882a593Smuzhiyun optopt);
486*4882a593Smuzhiyun case 'h':
487*4882a593Smuzhiyun __fallthrough;
488*4882a593Smuzhiyun default:
489*4882a593Smuzhiyun Usage();
490*4882a593Smuzhiyun return 0;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun if (optind < argc)
495*4882a593Smuzhiyun prog = argv[optind];
496*4882a593Smuzhiyun printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun return run_bpf_prog(prog, cg_id);
499*4882a593Smuzhiyun }
500