xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/net/reuseport_bpf.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Test functionality of BPF filters for SO_REUSEPORT.  The tests below will use
3*4882a593Smuzhiyun  * a BPF program (both classic and extended) to read the first word from an
4*4882a593Smuzhiyun  * incoming packet (expected to be in network byte-order), calculate a modulus
5*4882a593Smuzhiyun  * of that number, and then dispatch the packet to the Nth socket using the
6*4882a593Smuzhiyun  * result.  These tests are run for each supported address family and protocol.
7*4882a593Smuzhiyun  * Additionally, a few edge cases in the implementation are tested.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <errno.h>
11*4882a593Smuzhiyun #include <error.h>
12*4882a593Smuzhiyun #include <fcntl.h>
13*4882a593Smuzhiyun #include <linux/bpf.h>
14*4882a593Smuzhiyun #include <linux/filter.h>
15*4882a593Smuzhiyun #include <linux/unistd.h>
16*4882a593Smuzhiyun #include <netinet/in.h>
17*4882a593Smuzhiyun #include <netinet/tcp.h>
18*4882a593Smuzhiyun #include <stdio.h>
19*4882a593Smuzhiyun #include <stdlib.h>
20*4882a593Smuzhiyun #include <string.h>
21*4882a593Smuzhiyun #include <sys/epoll.h>
22*4882a593Smuzhiyun #include <sys/types.h>
23*4882a593Smuzhiyun #include <sys/socket.h>
24*4882a593Smuzhiyun #include <sys/resource.h>
25*4882a593Smuzhiyun #include <unistd.h>
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #ifndef ARRAY_SIZE
28*4882a593Smuzhiyun #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
29*4882a593Smuzhiyun #endif
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun struct test_params {
32*4882a593Smuzhiyun 	int recv_family;
33*4882a593Smuzhiyun 	int send_family;
34*4882a593Smuzhiyun 	int protocol;
35*4882a593Smuzhiyun 	size_t recv_socks;
36*4882a593Smuzhiyun 	uint16_t recv_port;
37*4882a593Smuzhiyun 	uint16_t send_port_min;
38*4882a593Smuzhiyun };
39*4882a593Smuzhiyun 
sockaddr_size(void)40*4882a593Smuzhiyun static size_t sockaddr_size(void)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun 	return sizeof(struct sockaddr_storage);
43*4882a593Smuzhiyun }
44*4882a593Smuzhiyun 
new_any_sockaddr(int family,uint16_t port)45*4882a593Smuzhiyun static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun 	struct sockaddr_storage *addr;
48*4882a593Smuzhiyun 	struct sockaddr_in *addr4;
49*4882a593Smuzhiyun 	struct sockaddr_in6 *addr6;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	addr = malloc(sizeof(struct sockaddr_storage));
52*4882a593Smuzhiyun 	memset(addr, 0, sizeof(struct sockaddr_storage));
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	switch (family) {
55*4882a593Smuzhiyun 	case AF_INET:
56*4882a593Smuzhiyun 		addr4 = (struct sockaddr_in *)addr;
57*4882a593Smuzhiyun 		addr4->sin_family = AF_INET;
58*4882a593Smuzhiyun 		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
59*4882a593Smuzhiyun 		addr4->sin_port = htons(port);
60*4882a593Smuzhiyun 		break;
61*4882a593Smuzhiyun 	case AF_INET6:
62*4882a593Smuzhiyun 		addr6 = (struct sockaddr_in6 *)addr;
63*4882a593Smuzhiyun 		addr6->sin6_family = AF_INET6;
64*4882a593Smuzhiyun 		addr6->sin6_addr = in6addr_any;
65*4882a593Smuzhiyun 		addr6->sin6_port = htons(port);
66*4882a593Smuzhiyun 		break;
67*4882a593Smuzhiyun 	default:
68*4882a593Smuzhiyun 		error(1, 0, "Unsupported family %d", family);
69*4882a593Smuzhiyun 	}
70*4882a593Smuzhiyun 	return (struct sockaddr *)addr;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
new_loopback_sockaddr(int family,uint16_t port)73*4882a593Smuzhiyun static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	struct sockaddr *addr = new_any_sockaddr(family, port);
76*4882a593Smuzhiyun 	struct sockaddr_in *addr4;
77*4882a593Smuzhiyun 	struct sockaddr_in6 *addr6;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	switch (family) {
80*4882a593Smuzhiyun 	case AF_INET:
81*4882a593Smuzhiyun 		addr4 = (struct sockaddr_in *)addr;
82*4882a593Smuzhiyun 		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
83*4882a593Smuzhiyun 		break;
84*4882a593Smuzhiyun 	case AF_INET6:
85*4882a593Smuzhiyun 		addr6 = (struct sockaddr_in6 *)addr;
86*4882a593Smuzhiyun 		addr6->sin6_addr = in6addr_loopback;
87*4882a593Smuzhiyun 		break;
88*4882a593Smuzhiyun 	default:
89*4882a593Smuzhiyun 		error(1, 0, "Unsupported family %d", family);
90*4882a593Smuzhiyun 	}
91*4882a593Smuzhiyun 	return addr;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
attach_ebpf(int fd,uint16_t mod)94*4882a593Smuzhiyun static void attach_ebpf(int fd, uint16_t mod)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun 	static char bpf_log_buf[65536];
97*4882a593Smuzhiyun 	static const char bpf_license[] = "GPL";
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	int bpf_fd;
100*4882a593Smuzhiyun 	const struct bpf_insn prog[] = {
101*4882a593Smuzhiyun 		/* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
102*4882a593Smuzhiyun 		{ BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
103*4882a593Smuzhiyun 		/* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
104*4882a593Smuzhiyun 		{ BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
105*4882a593Smuzhiyun 		/* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
106*4882a593Smuzhiyun 		{ BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
107*4882a593Smuzhiyun 		/* BPF_EXIT_INSN() */
108*4882a593Smuzhiyun 		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
109*4882a593Smuzhiyun 	};
110*4882a593Smuzhiyun 	union bpf_attr attr;
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	memset(&attr, 0, sizeof(attr));
113*4882a593Smuzhiyun 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
114*4882a593Smuzhiyun 	attr.insn_cnt = ARRAY_SIZE(prog);
115*4882a593Smuzhiyun 	attr.insns = (unsigned long) &prog;
116*4882a593Smuzhiyun 	attr.license = (unsigned long) &bpf_license;
117*4882a593Smuzhiyun 	attr.log_buf = (unsigned long) &bpf_log_buf;
118*4882a593Smuzhiyun 	attr.log_size = sizeof(bpf_log_buf);
119*4882a593Smuzhiyun 	attr.log_level = 1;
120*4882a593Smuzhiyun 	attr.kern_version = 0;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
123*4882a593Smuzhiyun 	if (bpf_fd < 0)
124*4882a593Smuzhiyun 		error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
127*4882a593Smuzhiyun 			sizeof(bpf_fd)))
128*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	close(bpf_fd);
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun 
attach_cbpf(int fd,uint16_t mod)133*4882a593Smuzhiyun static void attach_cbpf(int fd, uint16_t mod)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	struct sock_filter code[] = {
136*4882a593Smuzhiyun 		/* A = (uint32_t)skb[0] */
137*4882a593Smuzhiyun 		{ BPF_LD  | BPF_W | BPF_ABS, 0, 0, 0 },
138*4882a593Smuzhiyun 		/* A = A % mod */
139*4882a593Smuzhiyun 		{ BPF_ALU | BPF_MOD, 0, 0, mod },
140*4882a593Smuzhiyun 		/* return A */
141*4882a593Smuzhiyun 		{ BPF_RET | BPF_A, 0, 0, 0 },
142*4882a593Smuzhiyun 	};
143*4882a593Smuzhiyun 	struct sock_fprog p = {
144*4882a593Smuzhiyun 		.len = ARRAY_SIZE(code),
145*4882a593Smuzhiyun 		.filter = code,
146*4882a593Smuzhiyun 	};
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
149*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun 
build_recv_group(const struct test_params p,int fd[],uint16_t mod,void (* attach_bpf)(int,uint16_t))152*4882a593Smuzhiyun static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
153*4882a593Smuzhiyun 			     void (*attach_bpf)(int, uint16_t))
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun 	struct sockaddr * const addr =
156*4882a593Smuzhiyun 		new_any_sockaddr(p.recv_family, p.recv_port);
157*4882a593Smuzhiyun 	int i, opt;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	for (i = 0; i < p.recv_socks; ++i) {
160*4882a593Smuzhiyun 		fd[i] = socket(p.recv_family, p.protocol, 0);
161*4882a593Smuzhiyun 		if (fd[i] < 0)
162*4882a593Smuzhiyun 			error(1, errno, "failed to create recv %d", i);
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 		opt = 1;
165*4882a593Smuzhiyun 		if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
166*4882a593Smuzhiyun 			       sizeof(opt)))
167*4882a593Smuzhiyun 			error(1, errno, "failed to set SO_REUSEPORT on %d", i);
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 		if (i == 0)
170*4882a593Smuzhiyun 			attach_bpf(fd[i], mod);
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 		if (bind(fd[i], addr, sockaddr_size()))
173*4882a593Smuzhiyun 			error(1, errno, "failed to bind recv socket %d", i);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 		if (p.protocol == SOCK_STREAM) {
176*4882a593Smuzhiyun 			opt = 4;
177*4882a593Smuzhiyun 			if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
178*4882a593Smuzhiyun 				       sizeof(opt)))
179*4882a593Smuzhiyun 				error(1, errno,
180*4882a593Smuzhiyun 				      "failed to set TCP_FASTOPEN on %d", i);
181*4882a593Smuzhiyun 			if (listen(fd[i], p.recv_socks * 10))
182*4882a593Smuzhiyun 				error(1, errno, "failed to listen on socket");
183*4882a593Smuzhiyun 		}
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 	free(addr);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
send_from(struct test_params p,uint16_t sport,char * buf,size_t len)188*4882a593Smuzhiyun static void send_from(struct test_params p, uint16_t sport, char *buf,
189*4882a593Smuzhiyun 		      size_t len)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun 	struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
192*4882a593Smuzhiyun 	struct sockaddr * const daddr =
193*4882a593Smuzhiyun 		new_loopback_sockaddr(p.send_family, p.recv_port);
194*4882a593Smuzhiyun 	const int fd = socket(p.send_family, p.protocol, 0), one = 1;
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	if (fd < 0)
197*4882a593Smuzhiyun 		error(1, errno, "failed to create send socket");
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
200*4882a593Smuzhiyun 		error(1, errno, "failed to set reuseaddr");
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	if (bind(fd, saddr, sockaddr_size()))
203*4882a593Smuzhiyun 		error(1, errno, "failed to bind send socket");
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
206*4882a593Smuzhiyun 		error(1, errno, "failed to send message");
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	close(fd);
209*4882a593Smuzhiyun 	free(saddr);
210*4882a593Smuzhiyun 	free(daddr);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
test_recv_order(const struct test_params p,int fd[],int mod)213*4882a593Smuzhiyun static void test_recv_order(const struct test_params p, int fd[], int mod)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	char recv_buf[8], send_buf[8];
216*4882a593Smuzhiyun 	struct msghdr msg;
217*4882a593Smuzhiyun 	struct iovec recv_io = { recv_buf, 8 };
218*4882a593Smuzhiyun 	struct epoll_event ev;
219*4882a593Smuzhiyun 	int epfd, conn, i, sport, expected;
220*4882a593Smuzhiyun 	uint32_t data, ndata;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	epfd = epoll_create(1);
223*4882a593Smuzhiyun 	if (epfd < 0)
224*4882a593Smuzhiyun 		error(1, errno, "failed to create epoll");
225*4882a593Smuzhiyun 	for (i = 0; i < p.recv_socks; ++i) {
226*4882a593Smuzhiyun 		ev.events = EPOLLIN;
227*4882a593Smuzhiyun 		ev.data.fd = fd[i];
228*4882a593Smuzhiyun 		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
229*4882a593Smuzhiyun 			error(1, errno, "failed to register sock %d epoll", i);
230*4882a593Smuzhiyun 	}
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	memset(&msg, 0, sizeof(msg));
233*4882a593Smuzhiyun 	msg.msg_iov = &recv_io;
234*4882a593Smuzhiyun 	msg.msg_iovlen = 1;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	for (data = 0; data < p.recv_socks * 2; ++data) {
237*4882a593Smuzhiyun 		sport = p.send_port_min + data;
238*4882a593Smuzhiyun 		ndata = htonl(data);
239*4882a593Smuzhiyun 		memcpy(send_buf, &ndata, sizeof(ndata));
240*4882a593Smuzhiyun 		send_from(p, sport, send_buf, sizeof(ndata));
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 		i = epoll_wait(epfd, &ev, 1, -1);
243*4882a593Smuzhiyun 		if (i < 0)
244*4882a593Smuzhiyun 			error(1, errno, "epoll wait failed");
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 		if (p.protocol == SOCK_STREAM) {
247*4882a593Smuzhiyun 			conn = accept(ev.data.fd, NULL, NULL);
248*4882a593Smuzhiyun 			if (conn < 0)
249*4882a593Smuzhiyun 				error(1, errno, "error accepting");
250*4882a593Smuzhiyun 			i = recvmsg(conn, &msg, 0);
251*4882a593Smuzhiyun 			close(conn);
252*4882a593Smuzhiyun 		} else {
253*4882a593Smuzhiyun 			i = recvmsg(ev.data.fd, &msg, 0);
254*4882a593Smuzhiyun 		}
255*4882a593Smuzhiyun 		if (i < 0)
256*4882a593Smuzhiyun 			error(1, errno, "recvmsg error");
257*4882a593Smuzhiyun 		if (i != sizeof(ndata))
258*4882a593Smuzhiyun 			error(1, 0, "expected size %zd got %d",
259*4882a593Smuzhiyun 			      sizeof(ndata), i);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 		for (i = 0; i < p.recv_socks; ++i)
262*4882a593Smuzhiyun 			if (ev.data.fd == fd[i])
263*4882a593Smuzhiyun 				break;
264*4882a593Smuzhiyun 		memcpy(&ndata, recv_buf, sizeof(ndata));
265*4882a593Smuzhiyun 		fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 		expected = (sport % mod);
268*4882a593Smuzhiyun 		if (i != expected)
269*4882a593Smuzhiyun 			error(1, 0, "expected socket %d", expected);
270*4882a593Smuzhiyun 	}
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun 
test_reuseport_ebpf(struct test_params p)273*4882a593Smuzhiyun static void test_reuseport_ebpf(struct test_params p)
274*4882a593Smuzhiyun {
275*4882a593Smuzhiyun 	int i, fd[p.recv_socks];
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
278*4882a593Smuzhiyun 	build_recv_group(p, fd, p.recv_socks, attach_ebpf);
279*4882a593Smuzhiyun 	test_recv_order(p, fd, p.recv_socks);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	p.send_port_min += p.recv_socks * 2;
282*4882a593Smuzhiyun 	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
283*4882a593Smuzhiyun 	attach_ebpf(fd[0], p.recv_socks / 2);
284*4882a593Smuzhiyun 	test_recv_order(p, fd, p.recv_socks / 2);
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 	for (i = 0; i < p.recv_socks; ++i)
287*4882a593Smuzhiyun 		close(fd[i]);
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun 
test_reuseport_cbpf(struct test_params p)290*4882a593Smuzhiyun static void test_reuseport_cbpf(struct test_params p)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun 	int i, fd[p.recv_socks];
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 	fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
295*4882a593Smuzhiyun 	build_recv_group(p, fd, p.recv_socks, attach_cbpf);
296*4882a593Smuzhiyun 	test_recv_order(p, fd, p.recv_socks);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	p.send_port_min += p.recv_socks * 2;
299*4882a593Smuzhiyun 	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
300*4882a593Smuzhiyun 	attach_cbpf(fd[0], p.recv_socks / 2);
301*4882a593Smuzhiyun 	test_recv_order(p, fd, p.recv_socks / 2);
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	for (i = 0; i < p.recv_socks; ++i)
304*4882a593Smuzhiyun 		close(fd[i]);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun 
test_extra_filter(const struct test_params p)307*4882a593Smuzhiyun static void test_extra_filter(const struct test_params p)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun 	struct sockaddr * const addr =
310*4882a593Smuzhiyun 		new_any_sockaddr(p.recv_family, p.recv_port);
311*4882a593Smuzhiyun 	int fd1, fd2, opt;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	fprintf(stderr, "Testing too many filters...\n");
314*4882a593Smuzhiyun 	fd1 = socket(p.recv_family, p.protocol, 0);
315*4882a593Smuzhiyun 	if (fd1 < 0)
316*4882a593Smuzhiyun 		error(1, errno, "failed to create socket 1");
317*4882a593Smuzhiyun 	fd2 = socket(p.recv_family, p.protocol, 0);
318*4882a593Smuzhiyun 	if (fd2 < 0)
319*4882a593Smuzhiyun 		error(1, errno, "failed to create socket 2");
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	opt = 1;
322*4882a593Smuzhiyun 	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
323*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_REUSEPORT on socket 1");
324*4882a593Smuzhiyun 	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
325*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_REUSEPORT on socket 2");
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	attach_ebpf(fd1, 10);
328*4882a593Smuzhiyun 	attach_ebpf(fd2, 10);
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	if (bind(fd1, addr, sockaddr_size()))
331*4882a593Smuzhiyun 		error(1, errno, "failed to bind recv socket 1");
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE)
334*4882a593Smuzhiyun 		error(1, errno, "bind socket 2 should fail with EADDRINUSE");
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	free(addr);
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun 
test_filter_no_reuseport(const struct test_params p)339*4882a593Smuzhiyun static void test_filter_no_reuseport(const struct test_params p)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	struct sockaddr * const addr =
342*4882a593Smuzhiyun 		new_any_sockaddr(p.recv_family, p.recv_port);
343*4882a593Smuzhiyun 	const char bpf_license[] = "GPL";
344*4882a593Smuzhiyun 	struct bpf_insn ecode[] = {
345*4882a593Smuzhiyun 		{ BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
346*4882a593Smuzhiyun 		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
347*4882a593Smuzhiyun 	};
348*4882a593Smuzhiyun 	struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
349*4882a593Smuzhiyun 	union bpf_attr eprog;
350*4882a593Smuzhiyun 	struct sock_fprog cprog;
351*4882a593Smuzhiyun 	int fd, bpf_fd;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	memset(&eprog, 0, sizeof(eprog));
356*4882a593Smuzhiyun 	eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
357*4882a593Smuzhiyun 	eprog.insn_cnt = ARRAY_SIZE(ecode);
358*4882a593Smuzhiyun 	eprog.insns = (unsigned long) &ecode;
359*4882a593Smuzhiyun 	eprog.license = (unsigned long) &bpf_license;
360*4882a593Smuzhiyun 	eprog.kern_version = 0;
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	memset(&cprog, 0, sizeof(cprog));
363*4882a593Smuzhiyun 	cprog.len = ARRAY_SIZE(ccode);
364*4882a593Smuzhiyun 	cprog.filter = ccode;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
368*4882a593Smuzhiyun 	if (bpf_fd < 0)
369*4882a593Smuzhiyun 		error(1, errno, "ebpf error");
370*4882a593Smuzhiyun 	fd = socket(p.recv_family, p.protocol, 0);
371*4882a593Smuzhiyun 	if (fd < 0)
372*4882a593Smuzhiyun 		error(1, errno, "failed to create socket 1");
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	if (bind(fd, addr, sockaddr_size()))
375*4882a593Smuzhiyun 		error(1, errno, "failed to bind recv socket 1");
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	errno = 0;
378*4882a593Smuzhiyun 	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
379*4882a593Smuzhiyun 			sizeof(bpf_fd)) || errno != EINVAL)
380*4882a593Smuzhiyun 		error(1, errno, "setsockopt should have returned EINVAL");
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	errno = 0;
383*4882a593Smuzhiyun 	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
384*4882a593Smuzhiyun 		       sizeof(cprog)) || errno != EINVAL)
385*4882a593Smuzhiyun 		error(1, errno, "setsockopt should have returned EINVAL");
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun 	free(addr);
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun 
test_filter_without_bind(void)390*4882a593Smuzhiyun static void test_filter_without_bind(void)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun 	int fd1, fd2, opt = 1;
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	fprintf(stderr, "Testing filter add without bind...\n");
395*4882a593Smuzhiyun 	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
396*4882a593Smuzhiyun 	if (fd1 < 0)
397*4882a593Smuzhiyun 		error(1, errno, "failed to create socket 1");
398*4882a593Smuzhiyun 	fd2 = socket(AF_INET, SOCK_DGRAM, 0);
399*4882a593Smuzhiyun 	if (fd2 < 0)
400*4882a593Smuzhiyun 		error(1, errno, "failed to create socket 2");
401*4882a593Smuzhiyun 	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
402*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_REUSEPORT on socket 1");
403*4882a593Smuzhiyun 	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
404*4882a593Smuzhiyun 		error(1, errno, "failed to set SO_REUSEPORT on socket 2");
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	attach_ebpf(fd1, 10);
407*4882a593Smuzhiyun 	attach_cbpf(fd2, 10);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	close(fd1);
410*4882a593Smuzhiyun 	close(fd2);
411*4882a593Smuzhiyun }
412*4882a593Smuzhiyun 
enable_fastopen(void)413*4882a593Smuzhiyun void enable_fastopen(void)
414*4882a593Smuzhiyun {
415*4882a593Smuzhiyun 	int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
416*4882a593Smuzhiyun 	int rw_mask = 3;  /* bit 1: client side; bit-2 server side */
417*4882a593Smuzhiyun 	int val, size;
418*4882a593Smuzhiyun 	char buf[16];
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	if (fd < 0)
421*4882a593Smuzhiyun 		error(1, errno, "Unable to open tcp_fastopen sysctl");
422*4882a593Smuzhiyun 	if (read(fd, buf, sizeof(buf)) <= 0)
423*4882a593Smuzhiyun 		error(1, errno, "Unable to read tcp_fastopen sysctl");
424*4882a593Smuzhiyun 	val = atoi(buf);
425*4882a593Smuzhiyun 	close(fd);
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	if ((val & rw_mask) != rw_mask) {
428*4882a593Smuzhiyun 		fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
429*4882a593Smuzhiyun 		if (fd < 0)
430*4882a593Smuzhiyun 			error(1, errno,
431*4882a593Smuzhiyun 			      "Unable to open tcp_fastopen sysctl for writing");
432*4882a593Smuzhiyun 		val |= rw_mask;
433*4882a593Smuzhiyun 		size = snprintf(buf, 16, "%d", val);
434*4882a593Smuzhiyun 		if (write(fd, buf, size) <= 0)
435*4882a593Smuzhiyun 			error(1, errno, "Unable to write tcp_fastopen sysctl");
436*4882a593Smuzhiyun 		close(fd);
437*4882a593Smuzhiyun 	}
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun static struct rlimit rlim_old;
441*4882a593Smuzhiyun 
main_ctor(void)442*4882a593Smuzhiyun static  __attribute__((constructor)) void main_ctor(void)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun 	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	if (rlim_old.rlim_cur != RLIM_INFINITY) {
447*4882a593Smuzhiyun 		struct rlimit rlim_new;
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
450*4882a593Smuzhiyun 		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
451*4882a593Smuzhiyun 		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun 
main_dtor(void)455*4882a593Smuzhiyun static __attribute__((destructor)) void main_dtor(void)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun 	setrlimit(RLIMIT_MEMLOCK, &rlim_old);
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun 
main(void)460*4882a593Smuzhiyun int main(void)
461*4882a593Smuzhiyun {
462*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv4 UDP ----\n");
463*4882a593Smuzhiyun 	/* NOTE: UDP socket lookups traverse a different code path when there
464*4882a593Smuzhiyun 	 * are > 10 sockets in a group.  Run the bpf test through both paths.
465*4882a593Smuzhiyun 	 */
466*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
467*4882a593Smuzhiyun 		.recv_family = AF_INET,
468*4882a593Smuzhiyun 		.send_family = AF_INET,
469*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
470*4882a593Smuzhiyun 		.recv_socks = 10,
471*4882a593Smuzhiyun 		.recv_port = 8000,
472*4882a593Smuzhiyun 		.send_port_min = 9000});
473*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
474*4882a593Smuzhiyun 		.recv_family = AF_INET,
475*4882a593Smuzhiyun 		.send_family = AF_INET,
476*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
477*4882a593Smuzhiyun 		.recv_socks = 20,
478*4882a593Smuzhiyun 		.recv_port = 8000,
479*4882a593Smuzhiyun 		.send_port_min = 9000});
480*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
481*4882a593Smuzhiyun 		.recv_family = AF_INET,
482*4882a593Smuzhiyun 		.send_family = AF_INET,
483*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
484*4882a593Smuzhiyun 		.recv_socks = 10,
485*4882a593Smuzhiyun 		.recv_port = 8001,
486*4882a593Smuzhiyun 		.send_port_min = 9020});
487*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
488*4882a593Smuzhiyun 		.recv_family = AF_INET,
489*4882a593Smuzhiyun 		.send_family = AF_INET,
490*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
491*4882a593Smuzhiyun 		.recv_socks = 20,
492*4882a593Smuzhiyun 		.recv_port = 8001,
493*4882a593Smuzhiyun 		.send_port_min = 9020});
494*4882a593Smuzhiyun 	test_extra_filter((struct test_params) {
495*4882a593Smuzhiyun 		.recv_family = AF_INET,
496*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
497*4882a593Smuzhiyun 		.recv_port = 8002});
498*4882a593Smuzhiyun 	test_filter_no_reuseport((struct test_params) {
499*4882a593Smuzhiyun 		.recv_family = AF_INET,
500*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
501*4882a593Smuzhiyun 		.recv_port = 8008});
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv6 UDP ----\n");
504*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
505*4882a593Smuzhiyun 		.recv_family = AF_INET6,
506*4882a593Smuzhiyun 		.send_family = AF_INET6,
507*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
508*4882a593Smuzhiyun 		.recv_socks = 10,
509*4882a593Smuzhiyun 		.recv_port = 8003,
510*4882a593Smuzhiyun 		.send_port_min = 9040});
511*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
512*4882a593Smuzhiyun 		.recv_family = AF_INET6,
513*4882a593Smuzhiyun 		.send_family = AF_INET6,
514*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
515*4882a593Smuzhiyun 		.recv_socks = 20,
516*4882a593Smuzhiyun 		.recv_port = 8003,
517*4882a593Smuzhiyun 		.send_port_min = 9040});
518*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
519*4882a593Smuzhiyun 		.recv_family = AF_INET6,
520*4882a593Smuzhiyun 		.send_family = AF_INET6,
521*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
522*4882a593Smuzhiyun 		.recv_socks = 10,
523*4882a593Smuzhiyun 		.recv_port = 8004,
524*4882a593Smuzhiyun 		.send_port_min = 9060});
525*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
526*4882a593Smuzhiyun 		.recv_family = AF_INET6,
527*4882a593Smuzhiyun 		.send_family = AF_INET6,
528*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
529*4882a593Smuzhiyun 		.recv_socks = 20,
530*4882a593Smuzhiyun 		.recv_port = 8004,
531*4882a593Smuzhiyun 		.send_port_min = 9060});
532*4882a593Smuzhiyun 	test_extra_filter((struct test_params) {
533*4882a593Smuzhiyun 		.recv_family = AF_INET6,
534*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
535*4882a593Smuzhiyun 		.recv_port = 8005});
536*4882a593Smuzhiyun 	test_filter_no_reuseport((struct test_params) {
537*4882a593Smuzhiyun 		.recv_family = AF_INET6,
538*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
539*4882a593Smuzhiyun 		.recv_port = 8009});
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
542*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
543*4882a593Smuzhiyun 		.recv_family = AF_INET6,
544*4882a593Smuzhiyun 		.send_family = AF_INET,
545*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
546*4882a593Smuzhiyun 		.recv_socks = 20,
547*4882a593Smuzhiyun 		.recv_port = 8006,
548*4882a593Smuzhiyun 		.send_port_min = 9080});
549*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
550*4882a593Smuzhiyun 		.recv_family = AF_INET6,
551*4882a593Smuzhiyun 		.send_family = AF_INET,
552*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
553*4882a593Smuzhiyun 		.recv_socks = 10,
554*4882a593Smuzhiyun 		.recv_port = 8006,
555*4882a593Smuzhiyun 		.send_port_min = 9080});
556*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
557*4882a593Smuzhiyun 		.recv_family = AF_INET6,
558*4882a593Smuzhiyun 		.send_family = AF_INET,
559*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
560*4882a593Smuzhiyun 		.recv_socks = 10,
561*4882a593Smuzhiyun 		.recv_port = 8007,
562*4882a593Smuzhiyun 		.send_port_min = 9100});
563*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
564*4882a593Smuzhiyun 		.recv_family = AF_INET6,
565*4882a593Smuzhiyun 		.send_family = AF_INET,
566*4882a593Smuzhiyun 		.protocol = SOCK_DGRAM,
567*4882a593Smuzhiyun 		.recv_socks = 20,
568*4882a593Smuzhiyun 		.recv_port = 8007,
569*4882a593Smuzhiyun 		.send_port_min = 9100});
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	/* TCP fastopen is required for the TCP tests */
572*4882a593Smuzhiyun 	enable_fastopen();
573*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv4 TCP ----\n");
574*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
575*4882a593Smuzhiyun 		.recv_family = AF_INET,
576*4882a593Smuzhiyun 		.send_family = AF_INET,
577*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
578*4882a593Smuzhiyun 		.recv_socks = 10,
579*4882a593Smuzhiyun 		.recv_port = 8008,
580*4882a593Smuzhiyun 		.send_port_min = 9120});
581*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
582*4882a593Smuzhiyun 		.recv_family = AF_INET,
583*4882a593Smuzhiyun 		.send_family = AF_INET,
584*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
585*4882a593Smuzhiyun 		.recv_socks = 10,
586*4882a593Smuzhiyun 		.recv_port = 8009,
587*4882a593Smuzhiyun 		.send_port_min = 9160});
588*4882a593Smuzhiyun 	test_extra_filter((struct test_params) {
589*4882a593Smuzhiyun 		.recv_family = AF_INET,
590*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
591*4882a593Smuzhiyun 		.recv_port = 8010});
592*4882a593Smuzhiyun 	test_filter_no_reuseport((struct test_params) {
593*4882a593Smuzhiyun 		.recv_family = AF_INET,
594*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
595*4882a593Smuzhiyun 		.recv_port = 8011});
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv6 TCP ----\n");
598*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
599*4882a593Smuzhiyun 		.recv_family = AF_INET6,
600*4882a593Smuzhiyun 		.send_family = AF_INET6,
601*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
602*4882a593Smuzhiyun 		.recv_socks = 10,
603*4882a593Smuzhiyun 		.recv_port = 8012,
604*4882a593Smuzhiyun 		.send_port_min = 9200});
605*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
606*4882a593Smuzhiyun 		.recv_family = AF_INET6,
607*4882a593Smuzhiyun 		.send_family = AF_INET6,
608*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
609*4882a593Smuzhiyun 		.recv_socks = 10,
610*4882a593Smuzhiyun 		.recv_port = 8013,
611*4882a593Smuzhiyun 		.send_port_min = 9240});
612*4882a593Smuzhiyun 	test_extra_filter((struct test_params) {
613*4882a593Smuzhiyun 		.recv_family = AF_INET6,
614*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
615*4882a593Smuzhiyun 		.recv_port = 8014});
616*4882a593Smuzhiyun 	test_filter_no_reuseport((struct test_params) {
617*4882a593Smuzhiyun 		.recv_family = AF_INET6,
618*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
619*4882a593Smuzhiyun 		.recv_port = 8015});
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
622*4882a593Smuzhiyun 	test_reuseport_ebpf((struct test_params) {
623*4882a593Smuzhiyun 		.recv_family = AF_INET6,
624*4882a593Smuzhiyun 		.send_family = AF_INET,
625*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
626*4882a593Smuzhiyun 		.recv_socks = 10,
627*4882a593Smuzhiyun 		.recv_port = 8016,
628*4882a593Smuzhiyun 		.send_port_min = 9320});
629*4882a593Smuzhiyun 	test_reuseport_cbpf((struct test_params) {
630*4882a593Smuzhiyun 		.recv_family = AF_INET6,
631*4882a593Smuzhiyun 		.send_family = AF_INET,
632*4882a593Smuzhiyun 		.protocol = SOCK_STREAM,
633*4882a593Smuzhiyun 		.recv_socks = 10,
634*4882a593Smuzhiyun 		.recv_port = 8017,
635*4882a593Smuzhiyun 		.send_port_min = 9360});
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	test_filter_without_bind();
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	fprintf(stderr, "SUCCESS\n");
640*4882a593Smuzhiyun 	return 0;
641*4882a593Smuzhiyun }
642