1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright 2018 Google Inc.
4*4882a593Smuzhiyun * Author: Eric Dumazet (edumazet@google.com)
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Reference program demonstrating tcp mmap() usage,
7*4882a593Smuzhiyun * and SO_RCVLOWAT hints for receiver.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Note : NIC with header split is needed to use mmap() on TCP :
10*4882a593Smuzhiyun * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * How to use on loopback interface :
13*4882a593Smuzhiyun *
14*4882a593Smuzhiyun * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
15*4882a593Smuzhiyun * tcp_mmap -s -z &
16*4882a593Smuzhiyun * tcp_mmap -H ::1 -z
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
19*4882a593Smuzhiyun * (4096 : page size on x86, 12: TCP TS option length)
20*4882a593Smuzhiyun * tcp_mmap -s -z -M $((4096+12)) &
21*4882a593Smuzhiyun * tcp_mmap -H ::1 -z -M $((4096+12))
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
24*4882a593Smuzhiyun * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun * $ ./tcp_mmap -s & # Without mmap()
27*4882a593Smuzhiyun * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
28*4882a593Smuzhiyun * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
29*4882a593Smuzhiyun * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
30*4882a593Smuzhiyun * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
31*4882a593Smuzhiyun * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
32*4882a593Smuzhiyun * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
33*4882a593Smuzhiyun * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
34*4882a593Smuzhiyun * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
35*4882a593Smuzhiyun * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
36*4882a593Smuzhiyun * $ kill %1 # kill tcp_mmap server
37*4882a593Smuzhiyun *
38*4882a593Smuzhiyun * $ ./tcp_mmap -s -z & # With mmap()
39*4882a593Smuzhiyun * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
40*4882a593Smuzhiyun * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
41*4882a593Smuzhiyun * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
42*4882a593Smuzhiyun * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
43*4882a593Smuzhiyun * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
44*4882a593Smuzhiyun * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
45*4882a593Smuzhiyun * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
46*4882a593Smuzhiyun * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
47*4882a593Smuzhiyun * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
48*4882a593Smuzhiyun */
49*4882a593Smuzhiyun #define _GNU_SOURCE
50*4882a593Smuzhiyun #include <pthread.h>
51*4882a593Smuzhiyun #include <sys/types.h>
52*4882a593Smuzhiyun #include <fcntl.h>
53*4882a593Smuzhiyun #include <error.h>
54*4882a593Smuzhiyun #include <sys/socket.h>
55*4882a593Smuzhiyun #include <sys/mman.h>
56*4882a593Smuzhiyun #include <sys/resource.h>
57*4882a593Smuzhiyun #include <unistd.h>
58*4882a593Smuzhiyun #include <string.h>
59*4882a593Smuzhiyun #include <stdlib.h>
60*4882a593Smuzhiyun #include <stdio.h>
61*4882a593Smuzhiyun #include <errno.h>
62*4882a593Smuzhiyun #include <time.h>
63*4882a593Smuzhiyun #include <sys/time.h>
64*4882a593Smuzhiyun #include <netinet/in.h>
65*4882a593Smuzhiyun #include <arpa/inet.h>
66*4882a593Smuzhiyun #include <poll.h>
67*4882a593Smuzhiyun #include <linux/tcp.h>
68*4882a593Smuzhiyun #include <assert.h>
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun #ifndef MSG_ZEROCOPY
71*4882a593Smuzhiyun #define MSG_ZEROCOPY 0x4000000
72*4882a593Smuzhiyun #endif
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun #define FILE_SZ (1ULL << 35)
75*4882a593Smuzhiyun static int cfg_family = AF_INET6;
76*4882a593Smuzhiyun static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
77*4882a593Smuzhiyun static int cfg_port = 8787;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
80*4882a593Smuzhiyun static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
81*4882a593Smuzhiyun static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
82*4882a593Smuzhiyun static int xflg; /* hash received data (simple xor) (-h option) */
83*4882a593Smuzhiyun static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun static size_t chunk_size = 512*1024;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun static size_t map_align;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun unsigned long htotal;
90*4882a593Smuzhiyun
prefetch(const void * x)91*4882a593Smuzhiyun static inline void prefetch(const void *x)
92*4882a593Smuzhiyun {
93*4882a593Smuzhiyun #if defined(__x86_64__)
94*4882a593Smuzhiyun asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
95*4882a593Smuzhiyun #endif
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
hash_zone(void * zone,unsigned int length)98*4882a593Smuzhiyun void hash_zone(void *zone, unsigned int length)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun unsigned long temp = htotal;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun while (length >= 8*sizeof(long)) {
103*4882a593Smuzhiyun prefetch(zone + 384);
104*4882a593Smuzhiyun temp ^= *(unsigned long *)zone;
105*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + sizeof(long));
106*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 2*sizeof(long));
107*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 3*sizeof(long));
108*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 4*sizeof(long));
109*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 5*sizeof(long));
110*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 6*sizeof(long));
111*4882a593Smuzhiyun temp ^= *(unsigned long *)(zone + 7*sizeof(long));
112*4882a593Smuzhiyun zone += 8*sizeof(long);
113*4882a593Smuzhiyun length -= 8*sizeof(long);
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun while (length >= 1) {
116*4882a593Smuzhiyun temp ^= *(unsigned char *)zone;
117*4882a593Smuzhiyun zone += 1;
118*4882a593Smuzhiyun length--;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun htotal = temp;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
124*4882a593Smuzhiyun #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun
mmap_large_buffer(size_t need,size_t * allocated)127*4882a593Smuzhiyun static void *mmap_large_buffer(size_t need, size_t *allocated)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun void *buffer;
130*4882a593Smuzhiyun size_t sz;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun /* Attempt to use huge pages if possible. */
133*4882a593Smuzhiyun sz = ALIGN_UP(need, map_align);
134*4882a593Smuzhiyun buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
135*4882a593Smuzhiyun MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun if (buffer == (void *)-1) {
138*4882a593Smuzhiyun sz = need;
139*4882a593Smuzhiyun buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
140*4882a593Smuzhiyun MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
141*4882a593Smuzhiyun if (buffer != (void *)-1)
142*4882a593Smuzhiyun fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun *allocated = sz;
145*4882a593Smuzhiyun return buffer;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
child_thread(void * arg)148*4882a593Smuzhiyun void *child_thread(void *arg)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun unsigned long total_mmap = 0, total = 0;
151*4882a593Smuzhiyun struct tcp_zerocopy_receive zc;
152*4882a593Smuzhiyun unsigned long delta_usec;
153*4882a593Smuzhiyun int flags = MAP_SHARED;
154*4882a593Smuzhiyun struct timeval t0, t1;
155*4882a593Smuzhiyun char *buffer = NULL;
156*4882a593Smuzhiyun void *raddr = NULL;
157*4882a593Smuzhiyun void *addr = NULL;
158*4882a593Smuzhiyun double throughput;
159*4882a593Smuzhiyun struct rusage ru;
160*4882a593Smuzhiyun size_t buffer_sz;
161*4882a593Smuzhiyun int lu, fd;
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun fd = (int)(unsigned long)arg;
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun gettimeofday(&t0, NULL);
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun fcntl(fd, F_SETFL, O_NDELAY);
168*4882a593Smuzhiyun buffer = mmap_large_buffer(chunk_size, &buffer_sz);
169*4882a593Smuzhiyun if (buffer == (void *)-1) {
170*4882a593Smuzhiyun perror("mmap");
171*4882a593Smuzhiyun goto error;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun if (zflg) {
174*4882a593Smuzhiyun raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0);
175*4882a593Smuzhiyun if (raddr == (void *)-1) {
176*4882a593Smuzhiyun perror("mmap");
177*4882a593Smuzhiyun zflg = 0;
178*4882a593Smuzhiyun } else {
179*4882a593Smuzhiyun addr = ALIGN_PTR_UP(raddr, map_align);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun while (1) {
183*4882a593Smuzhiyun struct pollfd pfd = { .fd = fd, .events = POLLIN, };
184*4882a593Smuzhiyun int sub;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun poll(&pfd, 1, 10000);
187*4882a593Smuzhiyun if (zflg) {
188*4882a593Smuzhiyun socklen_t zc_len = sizeof(zc);
189*4882a593Smuzhiyun int res;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun memset(&zc, 0, sizeof(zc));
192*4882a593Smuzhiyun zc.address = (__u64)((unsigned long)addr);
193*4882a593Smuzhiyun zc.length = chunk_size;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
196*4882a593Smuzhiyun &zc, &zc_len);
197*4882a593Smuzhiyun if (res == -1)
198*4882a593Smuzhiyun break;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun if (zc.length) {
201*4882a593Smuzhiyun assert(zc.length <= chunk_size);
202*4882a593Smuzhiyun total_mmap += zc.length;
203*4882a593Smuzhiyun if (xflg)
204*4882a593Smuzhiyun hash_zone(addr, zc.length);
205*4882a593Smuzhiyun /* It is more efficient to unmap the pages right now,
206*4882a593Smuzhiyun * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
207*4882a593Smuzhiyun */
208*4882a593Smuzhiyun madvise(addr, zc.length, MADV_DONTNEED);
209*4882a593Smuzhiyun total += zc.length;
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun if (zc.recv_skip_hint) {
212*4882a593Smuzhiyun assert(zc.recv_skip_hint <= chunk_size);
213*4882a593Smuzhiyun lu = read(fd, buffer, zc.recv_skip_hint);
214*4882a593Smuzhiyun if (lu > 0) {
215*4882a593Smuzhiyun if (xflg)
216*4882a593Smuzhiyun hash_zone(buffer, lu);
217*4882a593Smuzhiyun total += lu;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun continue;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun sub = 0;
223*4882a593Smuzhiyun while (sub < chunk_size) {
224*4882a593Smuzhiyun lu = read(fd, buffer + sub, chunk_size - sub);
225*4882a593Smuzhiyun if (lu == 0)
226*4882a593Smuzhiyun goto end;
227*4882a593Smuzhiyun if (lu < 0)
228*4882a593Smuzhiyun break;
229*4882a593Smuzhiyun if (xflg)
230*4882a593Smuzhiyun hash_zone(buffer + sub, lu);
231*4882a593Smuzhiyun total += lu;
232*4882a593Smuzhiyun sub += lu;
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun end:
236*4882a593Smuzhiyun gettimeofday(&t1, NULL);
237*4882a593Smuzhiyun delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun throughput = 0;
240*4882a593Smuzhiyun if (delta_usec)
241*4882a593Smuzhiyun throughput = total * 8.0 / (double)delta_usec / 1000.0;
242*4882a593Smuzhiyun getrusage(RUSAGE_THREAD, &ru);
243*4882a593Smuzhiyun if (total > 1024*1024) {
244*4882a593Smuzhiyun unsigned long total_usec;
245*4882a593Smuzhiyun unsigned long mb = total >> 20;
246*4882a593Smuzhiyun total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
247*4882a593Smuzhiyun 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
248*4882a593Smuzhiyun printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
249*4882a593Smuzhiyun " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
250*4882a593Smuzhiyun total / (1024.0 * 1024.0),
251*4882a593Smuzhiyun 100.0*total_mmap/total,
252*4882a593Smuzhiyun (double)delta_usec / 1000000.0,
253*4882a593Smuzhiyun throughput,
254*4882a593Smuzhiyun (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
255*4882a593Smuzhiyun (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
256*4882a593Smuzhiyun (double)total_usec/mb,
257*4882a593Smuzhiyun ru.ru_nvcsw);
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun error:
260*4882a593Smuzhiyun munmap(buffer, buffer_sz);
261*4882a593Smuzhiyun close(fd);
262*4882a593Smuzhiyun if (zflg)
263*4882a593Smuzhiyun munmap(raddr, chunk_size + map_align);
264*4882a593Smuzhiyun pthread_exit(0);
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
apply_rcvsnd_buf(int fd)267*4882a593Smuzhiyun static void apply_rcvsnd_buf(int fd)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun if (rcvbuf && setsockopt(fd, SOL_SOCKET,
270*4882a593Smuzhiyun SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
271*4882a593Smuzhiyun perror("setsockopt SO_RCVBUF");
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun if (sndbuf && setsockopt(fd, SOL_SOCKET,
275*4882a593Smuzhiyun SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
276*4882a593Smuzhiyun perror("setsockopt SO_SNDBUF");
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun
setup_sockaddr(int domain,const char * str_addr,struct sockaddr_storage * sockaddr)281*4882a593Smuzhiyun static void setup_sockaddr(int domain, const char *str_addr,
282*4882a593Smuzhiyun struct sockaddr_storage *sockaddr)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun struct sockaddr_in6 *addr6 = (void *) sockaddr;
285*4882a593Smuzhiyun struct sockaddr_in *addr4 = (void *) sockaddr;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun switch (domain) {
288*4882a593Smuzhiyun case PF_INET:
289*4882a593Smuzhiyun memset(addr4, 0, sizeof(*addr4));
290*4882a593Smuzhiyun addr4->sin_family = AF_INET;
291*4882a593Smuzhiyun addr4->sin_port = htons(cfg_port);
292*4882a593Smuzhiyun if (str_addr &&
293*4882a593Smuzhiyun inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
294*4882a593Smuzhiyun error(1, 0, "ipv4 parse error: %s", str_addr);
295*4882a593Smuzhiyun break;
296*4882a593Smuzhiyun case PF_INET6:
297*4882a593Smuzhiyun memset(addr6, 0, sizeof(*addr6));
298*4882a593Smuzhiyun addr6->sin6_family = AF_INET6;
299*4882a593Smuzhiyun addr6->sin6_port = htons(cfg_port);
300*4882a593Smuzhiyun if (str_addr &&
301*4882a593Smuzhiyun inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
302*4882a593Smuzhiyun error(1, 0, "ipv6 parse error: %s", str_addr);
303*4882a593Smuzhiyun break;
304*4882a593Smuzhiyun default:
305*4882a593Smuzhiyun error(1, 0, "illegal domain");
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
do_accept(int fdlisten)309*4882a593Smuzhiyun static void do_accept(int fdlisten)
310*4882a593Smuzhiyun {
311*4882a593Smuzhiyun pthread_attr_t attr;
312*4882a593Smuzhiyun int rcvlowat;
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun pthread_attr_init(&attr);
315*4882a593Smuzhiyun pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun rcvlowat = chunk_size;
318*4882a593Smuzhiyun if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
319*4882a593Smuzhiyun &rcvlowat, sizeof(rcvlowat)) == -1) {
320*4882a593Smuzhiyun perror("setsockopt SO_RCVLOWAT");
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun apply_rcvsnd_buf(fdlisten);
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun while (1) {
326*4882a593Smuzhiyun struct sockaddr_in addr;
327*4882a593Smuzhiyun socklen_t addrlen = sizeof(addr);
328*4882a593Smuzhiyun pthread_t th;
329*4882a593Smuzhiyun int fd, res;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
332*4882a593Smuzhiyun if (fd == -1) {
333*4882a593Smuzhiyun perror("accept");
334*4882a593Smuzhiyun continue;
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun res = pthread_create(&th, &attr, child_thread,
337*4882a593Smuzhiyun (void *)(unsigned long)fd);
338*4882a593Smuzhiyun if (res) {
339*4882a593Smuzhiyun errno = res;
340*4882a593Smuzhiyun perror("pthread_create");
341*4882a593Smuzhiyun close(fd);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun }
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun /* Each thread should reserve a big enough vma to avoid
347*4882a593Smuzhiyun * spinlock collisions in ptl locks.
348*4882a593Smuzhiyun * This size is 2MB on x86_64, and is exported in /proc/meminfo.
349*4882a593Smuzhiyun */
default_huge_page_size(void)350*4882a593Smuzhiyun static unsigned long default_huge_page_size(void)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun FILE *f = fopen("/proc/meminfo", "r");
353*4882a593Smuzhiyun unsigned long hps = 0;
354*4882a593Smuzhiyun size_t linelen = 0;
355*4882a593Smuzhiyun char *line = NULL;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun if (!f)
358*4882a593Smuzhiyun return 0;
359*4882a593Smuzhiyun while (getline(&line, &linelen, f) > 0) {
360*4882a593Smuzhiyun if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
361*4882a593Smuzhiyun hps <<= 10;
362*4882a593Smuzhiyun break;
363*4882a593Smuzhiyun }
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun free(line);
366*4882a593Smuzhiyun fclose(f);
367*4882a593Smuzhiyun return hps;
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
main(int argc,char * argv[])370*4882a593Smuzhiyun int main(int argc, char *argv[])
371*4882a593Smuzhiyun {
372*4882a593Smuzhiyun struct sockaddr_storage listenaddr, addr;
373*4882a593Smuzhiyun unsigned int max_pacing_rate = 0;
374*4882a593Smuzhiyun uint64_t total = 0;
375*4882a593Smuzhiyun char *host = NULL;
376*4882a593Smuzhiyun int fd, c, on = 1;
377*4882a593Smuzhiyun size_t buffer_sz;
378*4882a593Smuzhiyun char *buffer;
379*4882a593Smuzhiyun int sflg = 0;
380*4882a593Smuzhiyun int mss = 0;
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
383*4882a593Smuzhiyun switch (c) {
384*4882a593Smuzhiyun case '4':
385*4882a593Smuzhiyun cfg_family = PF_INET;
386*4882a593Smuzhiyun cfg_alen = sizeof(struct sockaddr_in);
387*4882a593Smuzhiyun break;
388*4882a593Smuzhiyun case '6':
389*4882a593Smuzhiyun cfg_family = PF_INET6;
390*4882a593Smuzhiyun cfg_alen = sizeof(struct sockaddr_in6);
391*4882a593Smuzhiyun break;
392*4882a593Smuzhiyun case 'p':
393*4882a593Smuzhiyun cfg_port = atoi(optarg);
394*4882a593Smuzhiyun break;
395*4882a593Smuzhiyun case 'H':
396*4882a593Smuzhiyun host = optarg;
397*4882a593Smuzhiyun break;
398*4882a593Smuzhiyun case 's': /* server : listen for incoming connections */
399*4882a593Smuzhiyun sflg++;
400*4882a593Smuzhiyun break;
401*4882a593Smuzhiyun case 'r':
402*4882a593Smuzhiyun rcvbuf = atoi(optarg);
403*4882a593Smuzhiyun break;
404*4882a593Smuzhiyun case 'w':
405*4882a593Smuzhiyun sndbuf = atoi(optarg);
406*4882a593Smuzhiyun break;
407*4882a593Smuzhiyun case 'z':
408*4882a593Smuzhiyun zflg = 1;
409*4882a593Smuzhiyun break;
410*4882a593Smuzhiyun case 'M':
411*4882a593Smuzhiyun mss = atoi(optarg);
412*4882a593Smuzhiyun break;
413*4882a593Smuzhiyun case 'x':
414*4882a593Smuzhiyun xflg = 1;
415*4882a593Smuzhiyun break;
416*4882a593Smuzhiyun case 'k':
417*4882a593Smuzhiyun keepflag = 1;
418*4882a593Smuzhiyun break;
419*4882a593Smuzhiyun case 'P':
420*4882a593Smuzhiyun max_pacing_rate = atoi(optarg) ;
421*4882a593Smuzhiyun break;
422*4882a593Smuzhiyun case 'C':
423*4882a593Smuzhiyun chunk_size = atol(optarg);
424*4882a593Smuzhiyun break;
425*4882a593Smuzhiyun case 'a':
426*4882a593Smuzhiyun map_align = atol(optarg);
427*4882a593Smuzhiyun break;
428*4882a593Smuzhiyun default:
429*4882a593Smuzhiyun exit(1);
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun if (!map_align) {
433*4882a593Smuzhiyun map_align = default_huge_page_size();
434*4882a593Smuzhiyun /* if really /proc/meminfo is not helping,
435*4882a593Smuzhiyun * we use the default x86_64 hugepagesize.
436*4882a593Smuzhiyun */
437*4882a593Smuzhiyun if (!map_align)
438*4882a593Smuzhiyun map_align = 2*1024*1024;
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun if (sflg) {
441*4882a593Smuzhiyun int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun if (fdlisten == -1) {
444*4882a593Smuzhiyun perror("socket");
445*4882a593Smuzhiyun exit(1);
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun apply_rcvsnd_buf(fdlisten);
448*4882a593Smuzhiyun setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun setup_sockaddr(cfg_family, host, &listenaddr);
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun if (mss &&
453*4882a593Smuzhiyun setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
454*4882a593Smuzhiyun &mss, sizeof(mss)) == -1) {
455*4882a593Smuzhiyun perror("setsockopt TCP_MAXSEG");
456*4882a593Smuzhiyun exit(1);
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
459*4882a593Smuzhiyun perror("bind");
460*4882a593Smuzhiyun exit(1);
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun if (listen(fdlisten, 128) == -1) {
463*4882a593Smuzhiyun perror("listen");
464*4882a593Smuzhiyun exit(1);
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun do_accept(fdlisten);
467*4882a593Smuzhiyun }
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun buffer = mmap_large_buffer(chunk_size, &buffer_sz);
470*4882a593Smuzhiyun if (buffer == (char *)-1) {
471*4882a593Smuzhiyun perror("mmap");
472*4882a593Smuzhiyun exit(1);
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun fd = socket(cfg_family, SOCK_STREAM, 0);
476*4882a593Smuzhiyun if (fd == -1) {
477*4882a593Smuzhiyun perror("socket");
478*4882a593Smuzhiyun exit(1);
479*4882a593Smuzhiyun }
480*4882a593Smuzhiyun apply_rcvsnd_buf(fd);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun setup_sockaddr(cfg_family, host, &addr);
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun if (mss &&
485*4882a593Smuzhiyun setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
486*4882a593Smuzhiyun perror("setsockopt TCP_MAXSEG");
487*4882a593Smuzhiyun exit(1);
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
490*4882a593Smuzhiyun perror("connect");
491*4882a593Smuzhiyun exit(1);
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun if (max_pacing_rate &&
494*4882a593Smuzhiyun setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
495*4882a593Smuzhiyun &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
496*4882a593Smuzhiyun perror("setsockopt SO_MAX_PACING_RATE");
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
499*4882a593Smuzhiyun &on, sizeof(on)) == -1) {
500*4882a593Smuzhiyun perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
501*4882a593Smuzhiyun zflg = 0;
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun while (total < FILE_SZ) {
504*4882a593Smuzhiyun int64_t wr = FILE_SZ - total;
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun if (wr > chunk_size)
507*4882a593Smuzhiyun wr = chunk_size;
508*4882a593Smuzhiyun /* Note : we just want to fill the pipe with 0 bytes */
509*4882a593Smuzhiyun wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
510*4882a593Smuzhiyun if (wr <= 0)
511*4882a593Smuzhiyun break;
512*4882a593Smuzhiyun total += wr;
513*4882a593Smuzhiyun }
514*4882a593Smuzhiyun close(fd);
515*4882a593Smuzhiyun munmap(buffer, buffer_sz);
516*4882a593Smuzhiyun return 0;
517*4882a593Smuzhiyun }
518