1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or
5*4882a593Smuzhiyun * modify it under the terms of version 2 of the GNU General Public
6*4882a593Smuzhiyun * License as published by the Free Software Foundation.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * This program is distributed in the hope that it will be useful, but
9*4882a593Smuzhiyun * WITHOUT ANY WARRANTY; without even the implied warranty of
10*4882a593Smuzhiyun * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11*4882a593Smuzhiyun * General Public License for more details.
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun #define KBUILD_MODNAME "foo"
14*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
15*4882a593Smuzhiyun #include <linux/in.h>
16*4882a593Smuzhiyun #include <linux/if_ether.h>
17*4882a593Smuzhiyun #include <linux/if_packet.h>
18*4882a593Smuzhiyun #include <linux/if_vlan.h>
19*4882a593Smuzhiyun #include <linux/ip.h>
20*4882a593Smuzhiyun #include <linux/ipv6.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun struct {
27*4882a593Smuzhiyun __uint(type, BPF_MAP_TYPE_DEVMAP);
28*4882a593Smuzhiyun __uint(key_size, sizeof(int));
29*4882a593Smuzhiyun __uint(value_size, sizeof(int));
30*4882a593Smuzhiyun __uint(max_entries, 64);
31*4882a593Smuzhiyun } xdp_tx_ports SEC(".maps");
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /* from include/net/ip.h */
ip_decrease_ttl(struct iphdr * iph)34*4882a593Smuzhiyun static __always_inline int ip_decrease_ttl(struct iphdr *iph)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun u32 check = (__force u32)iph->check;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun check += (__force u32)htons(0x0100);
39*4882a593Smuzhiyun iph->check = (__force __sum16)(check + (check >= 0xFFFF));
40*4882a593Smuzhiyun return --iph->ttl;
41*4882a593Smuzhiyun }
42*4882a593Smuzhiyun
xdp_fwd_flags(struct xdp_md * ctx,u32 flags)43*4882a593Smuzhiyun static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun void *data_end = (void *)(long)ctx->data_end;
46*4882a593Smuzhiyun void *data = (void *)(long)ctx->data;
47*4882a593Smuzhiyun struct bpf_fib_lookup fib_params;
48*4882a593Smuzhiyun struct ethhdr *eth = data;
49*4882a593Smuzhiyun struct ipv6hdr *ip6h;
50*4882a593Smuzhiyun struct iphdr *iph;
51*4882a593Smuzhiyun u16 h_proto;
52*4882a593Smuzhiyun u64 nh_off;
53*4882a593Smuzhiyun int rc;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun nh_off = sizeof(*eth);
56*4882a593Smuzhiyun if (data + nh_off > data_end)
57*4882a593Smuzhiyun return XDP_DROP;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun __builtin_memset(&fib_params, 0, sizeof(fib_params));
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun h_proto = eth->h_proto;
62*4882a593Smuzhiyun if (h_proto == htons(ETH_P_IP)) {
63*4882a593Smuzhiyun iph = data + nh_off;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun if (iph + 1 > data_end)
66*4882a593Smuzhiyun return XDP_DROP;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun if (iph->ttl <= 1)
69*4882a593Smuzhiyun return XDP_PASS;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun fib_params.family = AF_INET;
72*4882a593Smuzhiyun fib_params.tos = iph->tos;
73*4882a593Smuzhiyun fib_params.l4_protocol = iph->protocol;
74*4882a593Smuzhiyun fib_params.sport = 0;
75*4882a593Smuzhiyun fib_params.dport = 0;
76*4882a593Smuzhiyun fib_params.tot_len = ntohs(iph->tot_len);
77*4882a593Smuzhiyun fib_params.ipv4_src = iph->saddr;
78*4882a593Smuzhiyun fib_params.ipv4_dst = iph->daddr;
79*4882a593Smuzhiyun } else if (h_proto == htons(ETH_P_IPV6)) {
80*4882a593Smuzhiyun struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
81*4882a593Smuzhiyun struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun ip6h = data + nh_off;
84*4882a593Smuzhiyun if (ip6h + 1 > data_end)
85*4882a593Smuzhiyun return XDP_DROP;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun if (ip6h->hop_limit <= 1)
88*4882a593Smuzhiyun return XDP_PASS;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun fib_params.family = AF_INET6;
91*4882a593Smuzhiyun fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
92*4882a593Smuzhiyun fib_params.l4_protocol = ip6h->nexthdr;
93*4882a593Smuzhiyun fib_params.sport = 0;
94*4882a593Smuzhiyun fib_params.dport = 0;
95*4882a593Smuzhiyun fib_params.tot_len = ntohs(ip6h->payload_len);
96*4882a593Smuzhiyun *src = ip6h->saddr;
97*4882a593Smuzhiyun *dst = ip6h->daddr;
98*4882a593Smuzhiyun } else {
99*4882a593Smuzhiyun return XDP_PASS;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun fib_params.ifindex = ctx->ingress_ifindex;
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun * Some rc (return codes) from bpf_fib_lookup() are important,
107*4882a593Smuzhiyun * to understand how this XDP-prog interacts with network stack.
108*4882a593Smuzhiyun *
109*4882a593Smuzhiyun * BPF_FIB_LKUP_RET_NO_NEIGH:
110*4882a593Smuzhiyun * Even if route lookup was a success, then the MAC-addresses are also
111*4882a593Smuzhiyun * needed. This is obtained from arp/neighbour table, but if table is
112*4882a593Smuzhiyun * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
113*4882a593Smuzhiyun * doing ARP lookup directly from XDP, then send packet to normal
114*4882a593Smuzhiyun * network stack via XDP_PASS and expect it will do ARP resolution.
115*4882a593Smuzhiyun *
116*4882a593Smuzhiyun * BPF_FIB_LKUP_RET_FWD_DISABLED:
117*4882a593Smuzhiyun * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
118*4882a593Smuzhiyun * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
119*4882a593Smuzhiyun * enabled this on ingress device.
120*4882a593Smuzhiyun */
121*4882a593Smuzhiyun if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
122*4882a593Smuzhiyun /* Verify egress index has been configured as TX-port.
123*4882a593Smuzhiyun * (Note: User can still have inserted an egress ifindex that
124*4882a593Smuzhiyun * doesn't support XDP xmit, which will result in packet drops).
125*4882a593Smuzhiyun *
126*4882a593Smuzhiyun * Note: lookup in devmap supported since 0cdbb4b09a0.
127*4882a593Smuzhiyun * If not supported will fail with:
128*4882a593Smuzhiyun * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
129*4882a593Smuzhiyun */
130*4882a593Smuzhiyun if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
131*4882a593Smuzhiyun return XDP_PASS;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun if (h_proto == htons(ETH_P_IP))
134*4882a593Smuzhiyun ip_decrease_ttl(iph);
135*4882a593Smuzhiyun else if (h_proto == htons(ETH_P_IPV6))
136*4882a593Smuzhiyun ip6h->hop_limit--;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
139*4882a593Smuzhiyun memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
140*4882a593Smuzhiyun return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun return XDP_PASS;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun SEC("xdp_fwd")
xdp_fwd_prog(struct xdp_md * ctx)147*4882a593Smuzhiyun int xdp_fwd_prog(struct xdp_md *ctx)
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun return xdp_fwd_flags(ctx, 0);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun SEC("xdp_fwd_direct")
xdp_fwd_direct_prog(struct xdp_md * ctx)153*4882a593Smuzhiyun int xdp_fwd_direct_prog(struct xdp_md *ctx)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
159