xref: /OK3568_Linux_fs/kernel/samples/bpf/hbm.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Copyright (c) 2019 Facebook
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or
5*4882a593Smuzhiyun  * modify it under the terms of version 2 of the GNU General Public
6*4882a593Smuzhiyun  * License as published by the Free Software Foundation.
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Example program for Host Bandwidth Managment
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * This program loads a cgroup skb BPF program to enforce cgroup output
11*4882a593Smuzhiyun  * (egress) or input (ingress) bandwidth limits.
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
14*4882a593Smuzhiyun  *   Where:
15*4882a593Smuzhiyun  *    -d	Print BPF trace debug buffer
16*4882a593Smuzhiyun  *    -l	Also limit flows doing loopback
17*4882a593Smuzhiyun  *    -n <#>	To create cgroup \"/hbm#\" and attach prog
18*4882a593Smuzhiyun  *		Default is /hbm1
19*4882a593Smuzhiyun  *    --no_cn   Do not return cn notifications
20*4882a593Smuzhiyun  *    -r <rate>	Rate limit in Mbps
21*4882a593Smuzhiyun  *    -s	Get HBM stats (marked, dropped, etc.)
22*4882a593Smuzhiyun  *    -t <time>	Exit after specified seconds (default is 0)
23*4882a593Smuzhiyun  *    -w	Work conserving flag. cgroup can increase its bandwidth
24*4882a593Smuzhiyun  *		beyond the rate limit specified while there is available
25*4882a593Smuzhiyun  *		bandwidth. Current implementation assumes there is only
26*4882a593Smuzhiyun  *		NIC (eth0), but can be extended to support multiple NICs.
27*4882a593Smuzhiyun  *		Currrently only supported for egress.
28*4882a593Smuzhiyun  *    -h	Print this info
29*4882a593Smuzhiyun  *    prog	BPF program file name. Name defaults to hbm_out_kern.o
30*4882a593Smuzhiyun  */
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun #define _GNU_SOURCE
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #include <stdio.h>
35*4882a593Smuzhiyun #include <stdlib.h>
36*4882a593Smuzhiyun #include <assert.h>
37*4882a593Smuzhiyun #include <sys/resource.h>
38*4882a593Smuzhiyun #include <sys/time.h>
39*4882a593Smuzhiyun #include <unistd.h>
40*4882a593Smuzhiyun #include <errno.h>
41*4882a593Smuzhiyun #include <fcntl.h>
42*4882a593Smuzhiyun #include <linux/unistd.h>
43*4882a593Smuzhiyun #include <linux/compiler.h>
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun #include <linux/bpf.h>
46*4882a593Smuzhiyun #include <bpf/bpf.h>
47*4882a593Smuzhiyun #include <getopt.h>
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun #include "bpf_load.h"
50*4882a593Smuzhiyun #include "bpf_rlimit.h"
51*4882a593Smuzhiyun #include "cgroup_helpers.h"
52*4882a593Smuzhiyun #include "hbm.h"
53*4882a593Smuzhiyun #include "bpf_util.h"
54*4882a593Smuzhiyun #include <bpf/bpf.h>
55*4882a593Smuzhiyun #include <bpf/libbpf.h>
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun bool outFlag = true;
58*4882a593Smuzhiyun int minRate = 1000;		/* cgroup rate limit in Mbps */
59*4882a593Smuzhiyun int rate = 1000;		/* can grow if rate conserving is enabled */
60*4882a593Smuzhiyun int dur = 1;
61*4882a593Smuzhiyun bool stats_flag;
62*4882a593Smuzhiyun bool loopback_flag;
63*4882a593Smuzhiyun bool debugFlag;
64*4882a593Smuzhiyun bool work_conserving_flag;
65*4882a593Smuzhiyun bool no_cn_flag;
66*4882a593Smuzhiyun bool edt_flag;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun static void Usage(void);
69*4882a593Smuzhiyun static void read_trace_pipe2(void);
70*4882a593Smuzhiyun static void do_error(char *msg, bool errno_flag);
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun #define DEBUGFS "/sys/kernel/debug/tracing/"
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun struct bpf_object *obj;
75*4882a593Smuzhiyun int bpfprog_fd;
76*4882a593Smuzhiyun int cgroup_storage_fd;
77*4882a593Smuzhiyun 
read_trace_pipe2(void)78*4882a593Smuzhiyun static void read_trace_pipe2(void)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun 	int trace_fd;
81*4882a593Smuzhiyun 	FILE *outf;
82*4882a593Smuzhiyun 	char *outFname = "hbm_out.log";
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
85*4882a593Smuzhiyun 	if (trace_fd < 0) {
86*4882a593Smuzhiyun 		printf("Error opening trace_pipe\n");
87*4882a593Smuzhiyun 		return;
88*4882a593Smuzhiyun 	}
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun //	Future support of ingress
91*4882a593Smuzhiyun //	if (!outFlag)
92*4882a593Smuzhiyun //		outFname = "hbm_in.log";
93*4882a593Smuzhiyun 	outf = fopen(outFname, "w");
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	if (outf == NULL)
96*4882a593Smuzhiyun 		printf("Error creating %s\n", outFname);
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	while (1) {
99*4882a593Smuzhiyun 		static char buf[4097];
100*4882a593Smuzhiyun 		ssize_t sz;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 		sz = read(trace_fd, buf, sizeof(buf) - 1);
103*4882a593Smuzhiyun 		if (sz > 0) {
104*4882a593Smuzhiyun 			buf[sz] = 0;
105*4882a593Smuzhiyun 			puts(buf);
106*4882a593Smuzhiyun 			if (outf != NULL) {
107*4882a593Smuzhiyun 				fprintf(outf, "%s\n", buf);
108*4882a593Smuzhiyun 				fflush(outf);
109*4882a593Smuzhiyun 			}
110*4882a593Smuzhiyun 		}
111*4882a593Smuzhiyun 	}
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun 
do_error(char * msg,bool errno_flag)114*4882a593Smuzhiyun static void do_error(char *msg, bool errno_flag)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	if (errno_flag)
117*4882a593Smuzhiyun 		printf("ERROR: %s, errno: %d\n", msg, errno);
118*4882a593Smuzhiyun 	else
119*4882a593Smuzhiyun 		printf("ERROR: %s\n", msg);
120*4882a593Smuzhiyun 	exit(1);
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun 
prog_load(char * prog)123*4882a593Smuzhiyun static int prog_load(char *prog)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun 	struct bpf_prog_load_attr prog_load_attr = {
126*4882a593Smuzhiyun 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
127*4882a593Smuzhiyun 		.file = prog,
128*4882a593Smuzhiyun 		.expected_attach_type = BPF_CGROUP_INET_EGRESS,
129*4882a593Smuzhiyun 	};
130*4882a593Smuzhiyun 	int map_fd;
131*4882a593Smuzhiyun 	struct bpf_map *map;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	int ret = 0;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	if (access(prog, O_RDONLY) < 0) {
136*4882a593Smuzhiyun 		printf("Error accessing file %s: %s\n", prog, strerror(errno));
137*4882a593Smuzhiyun 		return 1;
138*4882a593Smuzhiyun 	}
139*4882a593Smuzhiyun 	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
140*4882a593Smuzhiyun 		ret = 1;
141*4882a593Smuzhiyun 	if (!ret) {
142*4882a593Smuzhiyun 		map = bpf_object__find_map_by_name(obj, "queue_stats");
143*4882a593Smuzhiyun 		map_fd = bpf_map__fd(map);
144*4882a593Smuzhiyun 		if (map_fd < 0) {
145*4882a593Smuzhiyun 			printf("Map not found: %s\n", strerror(map_fd));
146*4882a593Smuzhiyun 			ret = 1;
147*4882a593Smuzhiyun 		}
148*4882a593Smuzhiyun 	}
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	if (ret) {
151*4882a593Smuzhiyun 		printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog);
152*4882a593Smuzhiyun 		printf("  Output from verifier:\n%s\n------\n", bpf_log_buf);
153*4882a593Smuzhiyun 		ret = -1;
154*4882a593Smuzhiyun 	} else {
155*4882a593Smuzhiyun 		ret = map_fd;
156*4882a593Smuzhiyun 	}
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	return ret;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun 
run_bpf_prog(char * prog,int cg_id)161*4882a593Smuzhiyun static int run_bpf_prog(char *prog, int cg_id)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun 	int map_fd;
164*4882a593Smuzhiyun 	int rc = 0;
165*4882a593Smuzhiyun 	int key = 0;
166*4882a593Smuzhiyun 	int cg1 = 0;
167*4882a593Smuzhiyun 	int type = BPF_CGROUP_INET_EGRESS;
168*4882a593Smuzhiyun 	char cg_dir[100];
169*4882a593Smuzhiyun 	struct hbm_queue_stats qstats = {0};
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	sprintf(cg_dir, "/hbm%d", cg_id);
172*4882a593Smuzhiyun 	map_fd = prog_load(prog);
173*4882a593Smuzhiyun 	if (map_fd  == -1)
174*4882a593Smuzhiyun 		return 1;
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	if (setup_cgroup_environment()) {
177*4882a593Smuzhiyun 		printf("ERROR: setting cgroup environment\n");
178*4882a593Smuzhiyun 		goto err;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 	cg1 = create_and_get_cgroup(cg_dir);
181*4882a593Smuzhiyun 	if (!cg1) {
182*4882a593Smuzhiyun 		printf("ERROR: create_and_get_cgroup\n");
183*4882a593Smuzhiyun 		goto err;
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 	if (join_cgroup(cg_dir)) {
186*4882a593Smuzhiyun 		printf("ERROR: join_cgroup\n");
187*4882a593Smuzhiyun 		goto err;
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	qstats.rate = rate;
191*4882a593Smuzhiyun 	qstats.stats = stats_flag ? 1 : 0;
192*4882a593Smuzhiyun 	qstats.loopback = loopback_flag ? 1 : 0;
193*4882a593Smuzhiyun 	qstats.no_cn = no_cn_flag ? 1 : 0;
194*4882a593Smuzhiyun 	if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
195*4882a593Smuzhiyun 		printf("ERROR: Could not update map element\n");
196*4882a593Smuzhiyun 		goto err;
197*4882a593Smuzhiyun 	}
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	if (!outFlag)
200*4882a593Smuzhiyun 		type = BPF_CGROUP_INET_INGRESS;
201*4882a593Smuzhiyun 	if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
202*4882a593Smuzhiyun 		printf("ERROR: bpf_prog_attach fails!\n");
203*4882a593Smuzhiyun 		log_err("Attaching prog");
204*4882a593Smuzhiyun 		goto err;
205*4882a593Smuzhiyun 	}
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	if (work_conserving_flag) {
208*4882a593Smuzhiyun 		struct timeval t0, t_last, t_new;
209*4882a593Smuzhiyun 		FILE *fin;
210*4882a593Smuzhiyun 		unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
211*4882a593Smuzhiyun 		signed long long last_cg_tx_bytes, new_cg_tx_bytes;
212*4882a593Smuzhiyun 		signed long long delta_time, delta_bytes, delta_rate;
213*4882a593Smuzhiyun 		int delta_ms;
214*4882a593Smuzhiyun #define DELTA_RATE_CHECK 10000		/* in us */
215*4882a593Smuzhiyun #define RATE_THRESHOLD 9500000000	/* 9.5 Gbps */
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 		bpf_map_lookup_elem(map_fd, &key, &qstats);
218*4882a593Smuzhiyun 		if (gettimeofday(&t0, NULL) < 0)
219*4882a593Smuzhiyun 			do_error("gettimeofday failed", true);
220*4882a593Smuzhiyun 		t_last = t0;
221*4882a593Smuzhiyun 		fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
222*4882a593Smuzhiyun 		if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
223*4882a593Smuzhiyun 			do_error("fscanf fails", false);
224*4882a593Smuzhiyun 		fclose(fin);
225*4882a593Smuzhiyun 		last_cg_tx_bytes = qstats.bytes_total;
226*4882a593Smuzhiyun 		while (true) {
227*4882a593Smuzhiyun 			usleep(DELTA_RATE_CHECK);
228*4882a593Smuzhiyun 			if (gettimeofday(&t_new, NULL) < 0)
229*4882a593Smuzhiyun 				do_error("gettimeofday failed", true);
230*4882a593Smuzhiyun 			delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
231*4882a593Smuzhiyun 				(t_new.tv_usec - t0.tv_usec)/1000;
232*4882a593Smuzhiyun 			if (delta_ms > dur * 1000)
233*4882a593Smuzhiyun 				break;
234*4882a593Smuzhiyun 			delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
235*4882a593Smuzhiyun 				(t_new.tv_usec - t_last.tv_usec);
236*4882a593Smuzhiyun 			if (delta_time == 0)
237*4882a593Smuzhiyun 				continue;
238*4882a593Smuzhiyun 			t_last = t_new;
239*4882a593Smuzhiyun 			fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
240*4882a593Smuzhiyun 				    "r");
241*4882a593Smuzhiyun 			if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
242*4882a593Smuzhiyun 				do_error("fscanf fails", false);
243*4882a593Smuzhiyun 			fclose(fin);
244*4882a593Smuzhiyun 			printf("  new_eth_tx_bytes:%llu\n",
245*4882a593Smuzhiyun 			       new_eth_tx_bytes);
246*4882a593Smuzhiyun 			bpf_map_lookup_elem(map_fd, &key, &qstats);
247*4882a593Smuzhiyun 			new_cg_tx_bytes = qstats.bytes_total;
248*4882a593Smuzhiyun 			delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
249*4882a593Smuzhiyun 			last_eth_tx_bytes = new_eth_tx_bytes;
250*4882a593Smuzhiyun 			delta_rate = (delta_bytes * 8000000) / delta_time;
251*4882a593Smuzhiyun 			printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
252*4882a593Smuzhiyun 			       delta_ms, delta_rate/1000000000.0,
253*4882a593Smuzhiyun 			       rate/1000.0);
254*4882a593Smuzhiyun 			if (delta_rate < RATE_THRESHOLD) {
255*4882a593Smuzhiyun 				/* can increase cgroup rate limit, but first
256*4882a593Smuzhiyun 				 * check if we are using the current limit.
257*4882a593Smuzhiyun 				 * Currently increasing by 6.25%, unknown
258*4882a593Smuzhiyun 				 * if that is the optimal rate.
259*4882a593Smuzhiyun 				 */
260*4882a593Smuzhiyun 				int rate_diff100;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 				delta_bytes = new_cg_tx_bytes -
263*4882a593Smuzhiyun 					last_cg_tx_bytes;
264*4882a593Smuzhiyun 				last_cg_tx_bytes = new_cg_tx_bytes;
265*4882a593Smuzhiyun 				delta_rate = (delta_bytes * 8000000) /
266*4882a593Smuzhiyun 					delta_time;
267*4882a593Smuzhiyun 				printf(" rate:%.3fGbps",
268*4882a593Smuzhiyun 				       delta_rate/1000000000.0);
269*4882a593Smuzhiyun 				rate_diff100 = (((long long)rate)*1000000 -
270*4882a593Smuzhiyun 						     delta_rate) * 100 /
271*4882a593Smuzhiyun 					(((long long) rate) * 1000000);
272*4882a593Smuzhiyun 				printf("  rdiff:%d", rate_diff100);
273*4882a593Smuzhiyun 				if (rate_diff100  <= 3) {
274*4882a593Smuzhiyun 					rate += (rate >> 4);
275*4882a593Smuzhiyun 					if (rate > RATE_THRESHOLD / 1000000)
276*4882a593Smuzhiyun 						rate = RATE_THRESHOLD / 1000000;
277*4882a593Smuzhiyun 					qstats.rate = rate;
278*4882a593Smuzhiyun 					printf(" INC\n");
279*4882a593Smuzhiyun 				} else {
280*4882a593Smuzhiyun 					printf("\n");
281*4882a593Smuzhiyun 				}
282*4882a593Smuzhiyun 			} else {
283*4882a593Smuzhiyun 				/* Need to decrease cgroup rate limit.
284*4882a593Smuzhiyun 				 * Currently decreasing by 12.5%, unknown
285*4882a593Smuzhiyun 				 * if that is optimal
286*4882a593Smuzhiyun 				 */
287*4882a593Smuzhiyun 				printf(" DEC\n");
288*4882a593Smuzhiyun 				rate -= (rate >> 3);
289*4882a593Smuzhiyun 				if (rate < minRate)
290*4882a593Smuzhiyun 					rate = minRate;
291*4882a593Smuzhiyun 				qstats.rate = rate;
292*4882a593Smuzhiyun 			}
293*4882a593Smuzhiyun 			if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY))
294*4882a593Smuzhiyun 				do_error("update map element fails", false);
295*4882a593Smuzhiyun 		}
296*4882a593Smuzhiyun 	} else {
297*4882a593Smuzhiyun 		sleep(dur);
298*4882a593Smuzhiyun 	}
299*4882a593Smuzhiyun 	// Get stats!
300*4882a593Smuzhiyun 	if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) {
301*4882a593Smuzhiyun 		char fname[100];
302*4882a593Smuzhiyun 		FILE *fout;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		if (!outFlag)
305*4882a593Smuzhiyun 			sprintf(fname, "hbm.%d.in", cg_id);
306*4882a593Smuzhiyun 		else
307*4882a593Smuzhiyun 			sprintf(fname, "hbm.%d.out", cg_id);
308*4882a593Smuzhiyun 		fout = fopen(fname, "w");
309*4882a593Smuzhiyun 		fprintf(fout, "id:%d\n", cg_id);
310*4882a593Smuzhiyun 		fprintf(fout, "ERROR: Could not lookup queue_stats\n");
311*4882a593Smuzhiyun 	} else if (stats_flag && qstats.lastPacketTime >
312*4882a593Smuzhiyun 		   qstats.firstPacketTime) {
313*4882a593Smuzhiyun 		long long delta_us = (qstats.lastPacketTime -
314*4882a593Smuzhiyun 				      qstats.firstPacketTime)/1000;
315*4882a593Smuzhiyun 		unsigned int rate_mbps = ((qstats.bytes_total -
316*4882a593Smuzhiyun 					   qstats.bytes_dropped) * 8 /
317*4882a593Smuzhiyun 					  delta_us);
318*4882a593Smuzhiyun 		double percent_pkts, percent_bytes;
319*4882a593Smuzhiyun 		char fname[100];
320*4882a593Smuzhiyun 		FILE *fout;
321*4882a593Smuzhiyun 		int k;
322*4882a593Smuzhiyun 		static const char *returnValNames[] = {
323*4882a593Smuzhiyun 			"DROP_PKT",
324*4882a593Smuzhiyun 			"ALLOW_PKT",
325*4882a593Smuzhiyun 			"DROP_PKT_CWR",
326*4882a593Smuzhiyun 			"ALLOW_PKT_CWR"
327*4882a593Smuzhiyun 		};
328*4882a593Smuzhiyun #define RET_VAL_COUNT 4
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun // Future support of ingress
331*4882a593Smuzhiyun //		if (!outFlag)
332*4882a593Smuzhiyun //			sprintf(fname, "hbm.%d.in", cg_id);
333*4882a593Smuzhiyun //		else
334*4882a593Smuzhiyun 		sprintf(fname, "hbm.%d.out", cg_id);
335*4882a593Smuzhiyun 		fout = fopen(fname, "w");
336*4882a593Smuzhiyun 		fprintf(fout, "id:%d\n", cg_id);
337*4882a593Smuzhiyun 		fprintf(fout, "rate_mbps:%d\n", rate_mbps);
338*4882a593Smuzhiyun 		fprintf(fout, "duration:%.1f secs\n",
339*4882a593Smuzhiyun 			(qstats.lastPacketTime - qstats.firstPacketTime) /
340*4882a593Smuzhiyun 			1000000000.0);
341*4882a593Smuzhiyun 		fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
342*4882a593Smuzhiyun 		fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
343*4882a593Smuzhiyun 						     1000000));
344*4882a593Smuzhiyun 		fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
345*4882a593Smuzhiyun 		fprintf(fout, "bytes_dropped_MB:%d\n",
346*4882a593Smuzhiyun 			(int)(qstats.bytes_dropped /
347*4882a593Smuzhiyun 						       1000000));
348*4882a593Smuzhiyun 		// Marked Pkts and Bytes
349*4882a593Smuzhiyun 		percent_pkts = (qstats.pkts_marked * 100.0) /
350*4882a593Smuzhiyun 			(qstats.pkts_total + 1);
351*4882a593Smuzhiyun 		percent_bytes = (qstats.bytes_marked * 100.0) /
352*4882a593Smuzhiyun 			(qstats.bytes_total + 1);
353*4882a593Smuzhiyun 		fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
354*4882a593Smuzhiyun 		fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 		// Dropped Pkts and Bytes
357*4882a593Smuzhiyun 		percent_pkts = (qstats.pkts_dropped * 100.0) /
358*4882a593Smuzhiyun 			(qstats.pkts_total + 1);
359*4882a593Smuzhiyun 		percent_bytes = (qstats.bytes_dropped * 100.0) /
360*4882a593Smuzhiyun 			(qstats.bytes_total + 1);
361*4882a593Smuzhiyun 		fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
362*4882a593Smuzhiyun 		fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 		// ECN CE markings
365*4882a593Smuzhiyun 		percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
366*4882a593Smuzhiyun 			(qstats.pkts_total + 1);
367*4882a593Smuzhiyun 		fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
368*4882a593Smuzhiyun 			(int)qstats.pkts_ecn_ce);
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 		// Average cwnd
371*4882a593Smuzhiyun 		fprintf(fout, "avg cwnd:%d\n",
372*4882a593Smuzhiyun 			(int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
373*4882a593Smuzhiyun 		// Average rtt
374*4882a593Smuzhiyun 		fprintf(fout, "avg rtt:%d\n",
375*4882a593Smuzhiyun 			(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
376*4882a593Smuzhiyun 		// Average credit
377*4882a593Smuzhiyun 		if (edt_flag)
378*4882a593Smuzhiyun 			fprintf(fout, "avg credit_ms:%.03f\n",
379*4882a593Smuzhiyun 				(qstats.sum_credit /
380*4882a593Smuzhiyun 				 (qstats.pkts_total + 1.0)) / 1000000.0);
381*4882a593Smuzhiyun 		else
382*4882a593Smuzhiyun 			fprintf(fout, "avg credit:%d\n",
383*4882a593Smuzhiyun 				(int)(qstats.sum_credit /
384*4882a593Smuzhiyun 				      (1500 * ((int)qstats.pkts_total ) + 1)));
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 		// Return values stats
387*4882a593Smuzhiyun 		for (k = 0; k < RET_VAL_COUNT; k++) {
388*4882a593Smuzhiyun 			percent_pkts = (qstats.returnValCount[k] * 100.0) /
389*4882a593Smuzhiyun 				(qstats.pkts_total + 1);
390*4882a593Smuzhiyun 			fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
391*4882a593Smuzhiyun 				percent_pkts, (int)qstats.returnValCount[k]);
392*4882a593Smuzhiyun 		}
393*4882a593Smuzhiyun 		fclose(fout);
394*4882a593Smuzhiyun 	}
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	if (debugFlag)
397*4882a593Smuzhiyun 		read_trace_pipe2();
398*4882a593Smuzhiyun 	return rc;
399*4882a593Smuzhiyun err:
400*4882a593Smuzhiyun 	rc = 1;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	if (cg1)
403*4882a593Smuzhiyun 		close(cg1);
404*4882a593Smuzhiyun 	cleanup_cgroup_environment();
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	return rc;
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun 
Usage(void)409*4882a593Smuzhiyun static void Usage(void)
410*4882a593Smuzhiyun {
411*4882a593Smuzhiyun 	printf("This program loads a cgroup skb BPF program to enforce\n"
412*4882a593Smuzhiyun 	       "cgroup output (egress) bandwidth limits.\n\n"
413*4882a593Smuzhiyun 	       "USAGE: hbm [-o] [-d]  [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
414*4882a593Smuzhiyun 	       "           [-s] [-t <secs>] [-w] [-h] [prog]\n"
415*4882a593Smuzhiyun 	       "  Where:\n"
416*4882a593Smuzhiyun 	       "    -o         indicates egress direction (default)\n"
417*4882a593Smuzhiyun 	       "    -d         print BPF trace debug buffer\n"
418*4882a593Smuzhiyun 	       "    --edt      use fq's Earliest Departure Time\n"
419*4882a593Smuzhiyun 	       "    -l         also limit flows using loopback\n"
420*4882a593Smuzhiyun 	       "    -n <#>     to create cgroup \"/hbm#\" and attach prog\n"
421*4882a593Smuzhiyun 	       "               Default is /hbm1\n"
422*4882a593Smuzhiyun 	       "    --no_cn    disable CN notifications\n"
423*4882a593Smuzhiyun 	       "    -r <rate>  Rate in Mbps\n"
424*4882a593Smuzhiyun 	       "    -s         Update HBM stats\n"
425*4882a593Smuzhiyun 	       "    -t <time>  Exit after specified seconds (default is 0)\n"
426*4882a593Smuzhiyun 	       "    -w	       Work conserving flag. cgroup can increase\n"
427*4882a593Smuzhiyun 	       "               bandwidth beyond the rate limit specified\n"
428*4882a593Smuzhiyun 	       "               while there is available bandwidth. Current\n"
429*4882a593Smuzhiyun 	       "               implementation assumes there is only eth0\n"
430*4882a593Smuzhiyun 	       "               but can be extended to support multiple NICs\n"
431*4882a593Smuzhiyun 	       "    -h         print this info\n"
432*4882a593Smuzhiyun 	       "    prog       BPF program file name. Name defaults to\n"
433*4882a593Smuzhiyun 	       "                 hbm_out_kern.o\n");
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun 
main(int argc,char ** argv)436*4882a593Smuzhiyun int main(int argc, char **argv)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	char *prog = "hbm_out_kern.o";
439*4882a593Smuzhiyun 	int  k;
440*4882a593Smuzhiyun 	int cg_id = 1;
441*4882a593Smuzhiyun 	char *optstring = "iodln:r:st:wh";
442*4882a593Smuzhiyun 	struct option loptions[] = {
443*4882a593Smuzhiyun 		{"no_cn", 0, NULL, 1},
444*4882a593Smuzhiyun 		{"edt", 0, NULL, 2},
445*4882a593Smuzhiyun 		{NULL, 0, NULL, 0}
446*4882a593Smuzhiyun 	};
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
449*4882a593Smuzhiyun 		switch (k) {
450*4882a593Smuzhiyun 		case 1:
451*4882a593Smuzhiyun 			no_cn_flag = true;
452*4882a593Smuzhiyun 			break;
453*4882a593Smuzhiyun 		case 2:
454*4882a593Smuzhiyun 			prog = "hbm_edt_kern.o";
455*4882a593Smuzhiyun 			edt_flag = true;
456*4882a593Smuzhiyun 			break;
457*4882a593Smuzhiyun 		case'o':
458*4882a593Smuzhiyun 			break;
459*4882a593Smuzhiyun 		case 'd':
460*4882a593Smuzhiyun 			debugFlag = true;
461*4882a593Smuzhiyun 			break;
462*4882a593Smuzhiyun 		case 'l':
463*4882a593Smuzhiyun 			loopback_flag = true;
464*4882a593Smuzhiyun 			break;
465*4882a593Smuzhiyun 		case 'n':
466*4882a593Smuzhiyun 			cg_id = atoi(optarg);
467*4882a593Smuzhiyun 			break;
468*4882a593Smuzhiyun 		case 'r':
469*4882a593Smuzhiyun 			minRate = atoi(optarg) * 1.024;
470*4882a593Smuzhiyun 			rate = minRate;
471*4882a593Smuzhiyun 			break;
472*4882a593Smuzhiyun 		case 's':
473*4882a593Smuzhiyun 			stats_flag = true;
474*4882a593Smuzhiyun 			break;
475*4882a593Smuzhiyun 		case 't':
476*4882a593Smuzhiyun 			dur = atoi(optarg);
477*4882a593Smuzhiyun 			break;
478*4882a593Smuzhiyun 		case 'w':
479*4882a593Smuzhiyun 			work_conserving_flag = true;
480*4882a593Smuzhiyun 			break;
481*4882a593Smuzhiyun 		case '?':
482*4882a593Smuzhiyun 			if (optopt == 'n' || optopt == 'r' || optopt == 't')
483*4882a593Smuzhiyun 				fprintf(stderr,
484*4882a593Smuzhiyun 					"Option -%c requires an argument.\n\n",
485*4882a593Smuzhiyun 					optopt);
486*4882a593Smuzhiyun 		case 'h':
487*4882a593Smuzhiyun 			__fallthrough;
488*4882a593Smuzhiyun 		default:
489*4882a593Smuzhiyun 			Usage();
490*4882a593Smuzhiyun 			return 0;
491*4882a593Smuzhiyun 		}
492*4882a593Smuzhiyun 	}
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	if (optind < argc)
495*4882a593Smuzhiyun 		prog = argv[optind];
496*4882a593Smuzhiyun 	printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	return run_bpf_prog(prog, cg_id);
499*4882a593Smuzhiyun }
500