1*4882a593Smuzhiyun /* Copyright (c) 2017 Facebook
2*4882a593Smuzhiyun *
3*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
4*4882a593Smuzhiyun * modify it under the terms of version 2 of the GNU General Public
5*4882a593Smuzhiyun * License as published by the Free Software Foundation.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * BPF program to set congestion control to dctcp when both hosts are
8*4882a593Smuzhiyun * in the same datacenter (as deteremined by IPv6 prefix).
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
14*4882a593Smuzhiyun #include <uapi/linux/tcp.h>
15*4882a593Smuzhiyun #include <uapi/linux/if_ether.h>
16*4882a593Smuzhiyun #include <uapi/linux/if_packet.h>
17*4882a593Smuzhiyun #include <uapi/linux/ip.h>
18*4882a593Smuzhiyun #include <linux/socket.h>
19*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
20*4882a593Smuzhiyun #include <bpf/bpf_endian.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #define DEBUG 1
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun SEC("sockops")
bpf_cong(struct bpf_sock_ops * skops)25*4882a593Smuzhiyun int bpf_cong(struct bpf_sock_ops *skops)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun char cong[] = "dctcp";
28*4882a593Smuzhiyun int rv = 0;
29*4882a593Smuzhiyun int op;
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun /* For testing purposes, only execute rest of BPF program
32*4882a593Smuzhiyun * if neither port numberis 55601
33*4882a593Smuzhiyun */
34*4882a593Smuzhiyun if (bpf_ntohl(skops->remote_port) != 55601 &&
35*4882a593Smuzhiyun skops->local_port != 55601) {
36*4882a593Smuzhiyun skops->reply = -1;
37*4882a593Smuzhiyun return 1;
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun op = (int) skops->op;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #ifdef DEBUG
43*4882a593Smuzhiyun bpf_printk("BPF command: %d\n", op);
44*4882a593Smuzhiyun #endif
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun /* Check if both hosts are in the same datacenter. For this
47*4882a593Smuzhiyun * example they are if the 1st 5.5 bytes in the IPv6 address
48*4882a593Smuzhiyun * are the same.
49*4882a593Smuzhiyun */
50*4882a593Smuzhiyun if (skops->family == AF_INET6 &&
51*4882a593Smuzhiyun skops->local_ip6[0] == skops->remote_ip6[0] &&
52*4882a593Smuzhiyun (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
53*4882a593Smuzhiyun (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
54*4882a593Smuzhiyun switch (op) {
55*4882a593Smuzhiyun case BPF_SOCK_OPS_NEEDS_ECN:
56*4882a593Smuzhiyun rv = 1;
57*4882a593Smuzhiyun break;
58*4882a593Smuzhiyun case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
59*4882a593Smuzhiyun rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
60*4882a593Smuzhiyun cong, sizeof(cong));
61*4882a593Smuzhiyun break;
62*4882a593Smuzhiyun case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
63*4882a593Smuzhiyun rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
64*4882a593Smuzhiyun cong, sizeof(cong));
65*4882a593Smuzhiyun break;
66*4882a593Smuzhiyun default:
67*4882a593Smuzhiyun rv = -1;
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun } else {
70*4882a593Smuzhiyun rv = -1;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun #ifdef DEBUG
73*4882a593Smuzhiyun bpf_printk("Returning %d\n", rv);
74*4882a593Smuzhiyun #endif
75*4882a593Smuzhiyun skops->reply = rv;
76*4882a593Smuzhiyun return 1;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
79