xref: /OK3568_Linux_fs/kernel/samples/bpf/sockex3_kern.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2*4882a593Smuzhiyun  *
3*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or
4*4882a593Smuzhiyun  * modify it under the terms of version 2 of the GNU General Public
5*4882a593Smuzhiyun  * License as published by the Free Software Foundation.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
8*4882a593Smuzhiyun #include <uapi/linux/in.h>
9*4882a593Smuzhiyun #include <uapi/linux/if.h>
10*4882a593Smuzhiyun #include <uapi/linux/if_ether.h>
11*4882a593Smuzhiyun #include <uapi/linux/ip.h>
12*4882a593Smuzhiyun #include <uapi/linux/ipv6.h>
13*4882a593Smuzhiyun #include <uapi/linux/if_tunnel.h>
14*4882a593Smuzhiyun #include <uapi/linux/mpls.h>
15*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
16*4882a593Smuzhiyun #include "bpf_legacy.h"
17*4882a593Smuzhiyun #define IP_MF		0x2000
18*4882a593Smuzhiyun #define IP_OFFSET	0x1FFF
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun struct {
23*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
24*4882a593Smuzhiyun 	__uint(key_size, sizeof(u32));
25*4882a593Smuzhiyun 	__uint(value_size, sizeof(u32));
26*4882a593Smuzhiyun 	__uint(max_entries, 8);
27*4882a593Smuzhiyun } jmp_table SEC(".maps");
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #define PARSE_VLAN 1
30*4882a593Smuzhiyun #define PARSE_MPLS 2
31*4882a593Smuzhiyun #define PARSE_IP 3
32*4882a593Smuzhiyun #define PARSE_IPV6 4
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /* Protocol dispatch routine. It tail-calls next BPF program depending
35*4882a593Smuzhiyun  * on eth proto. Note, we could have used ...
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  *   bpf_tail_call(skb, &jmp_table, proto);
38*4882a593Smuzhiyun  *
39*4882a593Smuzhiyun  * ... but it would need large prog_array and cannot be optimised given
40*4882a593Smuzhiyun  * the map key is not static.
41*4882a593Smuzhiyun  */
parse_eth_proto(struct __sk_buff * skb,u32 proto)42*4882a593Smuzhiyun static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	switch (proto) {
45*4882a593Smuzhiyun 	case ETH_P_8021Q:
46*4882a593Smuzhiyun 	case ETH_P_8021AD:
47*4882a593Smuzhiyun 		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
48*4882a593Smuzhiyun 		break;
49*4882a593Smuzhiyun 	case ETH_P_MPLS_UC:
50*4882a593Smuzhiyun 	case ETH_P_MPLS_MC:
51*4882a593Smuzhiyun 		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
52*4882a593Smuzhiyun 		break;
53*4882a593Smuzhiyun 	case ETH_P_IP:
54*4882a593Smuzhiyun 		bpf_tail_call(skb, &jmp_table, PARSE_IP);
55*4882a593Smuzhiyun 		break;
56*4882a593Smuzhiyun 	case ETH_P_IPV6:
57*4882a593Smuzhiyun 		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
58*4882a593Smuzhiyun 		break;
59*4882a593Smuzhiyun 	}
60*4882a593Smuzhiyun }
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun struct vlan_hdr {
63*4882a593Smuzhiyun 	__be16 h_vlan_TCI;
64*4882a593Smuzhiyun 	__be16 h_vlan_encapsulated_proto;
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun struct flow_key_record {
68*4882a593Smuzhiyun 	__be32 src;
69*4882a593Smuzhiyun 	__be32 dst;
70*4882a593Smuzhiyun 	union {
71*4882a593Smuzhiyun 		__be32 ports;
72*4882a593Smuzhiyun 		__be16 port16[2];
73*4882a593Smuzhiyun 	};
74*4882a593Smuzhiyun 	__u32 ip_proto;
75*4882a593Smuzhiyun };
76*4882a593Smuzhiyun 
ip_is_fragment(struct __sk_buff * ctx,__u64 nhoff)77*4882a593Smuzhiyun static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
80*4882a593Smuzhiyun 		& (IP_MF | IP_OFFSET);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun 
ipv6_addr_hash(struct __sk_buff * ctx,__u64 off)83*4882a593Smuzhiyun static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	__u64 w0 = load_word(ctx, off);
86*4882a593Smuzhiyun 	__u64 w1 = load_word(ctx, off + 4);
87*4882a593Smuzhiyun 	__u64 w2 = load_word(ctx, off + 8);
88*4882a593Smuzhiyun 	__u64 w3 = load_word(ctx, off + 12);
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun struct globals {
94*4882a593Smuzhiyun 	struct flow_key_record flow;
95*4882a593Smuzhiyun };
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun struct {
98*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_ARRAY);
99*4882a593Smuzhiyun 	__type(key, __u32);
100*4882a593Smuzhiyun 	__type(value, struct globals);
101*4882a593Smuzhiyun 	__uint(max_entries, 32);
102*4882a593Smuzhiyun } percpu_map SEC(".maps");
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun /* user poor man's per_cpu until native support is ready */
this_cpu_globals(void)105*4882a593Smuzhiyun static struct globals *this_cpu_globals(void)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun 	u32 key = bpf_get_smp_processor_id();
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	return bpf_map_lookup_elem(&percpu_map, &key);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun /* some simple stats for user space consumption */
113*4882a593Smuzhiyun struct pair {
114*4882a593Smuzhiyun 	__u64 packets;
115*4882a593Smuzhiyun 	__u64 bytes;
116*4882a593Smuzhiyun };
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun struct {
119*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_HASH);
120*4882a593Smuzhiyun 	__type(key, struct flow_key_record);
121*4882a593Smuzhiyun 	__type(value, struct pair);
122*4882a593Smuzhiyun 	__uint(max_entries, 1024);
123*4882a593Smuzhiyun } hash_map SEC(".maps");
124*4882a593Smuzhiyun 
update_stats(struct __sk_buff * skb,struct globals * g)125*4882a593Smuzhiyun static void update_stats(struct __sk_buff *skb, struct globals *g)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun 	struct flow_key_record key = g->flow;
128*4882a593Smuzhiyun 	struct pair *value;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	value = bpf_map_lookup_elem(&hash_map, &key);
131*4882a593Smuzhiyun 	if (value) {
132*4882a593Smuzhiyun 		__sync_fetch_and_add(&value->packets, 1);
133*4882a593Smuzhiyun 		__sync_fetch_and_add(&value->bytes, skb->len);
134*4882a593Smuzhiyun 	} else {
135*4882a593Smuzhiyun 		struct pair val = {1, skb->len};
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
138*4882a593Smuzhiyun 	}
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
parse_ip_proto(struct __sk_buff * skb,struct globals * g,__u32 ip_proto)141*4882a593Smuzhiyun static __always_inline void parse_ip_proto(struct __sk_buff *skb,
142*4882a593Smuzhiyun 					   struct globals *g, __u32 ip_proto)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun 	__u32 nhoff = skb->cb[0];
145*4882a593Smuzhiyun 	int poff;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	switch (ip_proto) {
148*4882a593Smuzhiyun 	case IPPROTO_GRE: {
149*4882a593Smuzhiyun 		struct gre_hdr {
150*4882a593Smuzhiyun 			__be16 flags;
151*4882a593Smuzhiyun 			__be16 proto;
152*4882a593Smuzhiyun 		};
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 		__u32 gre_flags = load_half(skb,
155*4882a593Smuzhiyun 					    nhoff + offsetof(struct gre_hdr, flags));
156*4882a593Smuzhiyun 		__u32 gre_proto = load_half(skb,
157*4882a593Smuzhiyun 					    nhoff + offsetof(struct gre_hdr, proto));
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
160*4882a593Smuzhiyun 			break;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 		nhoff += 4;
163*4882a593Smuzhiyun 		if (gre_flags & GRE_CSUM)
164*4882a593Smuzhiyun 			nhoff += 4;
165*4882a593Smuzhiyun 		if (gre_flags & GRE_KEY)
166*4882a593Smuzhiyun 			nhoff += 4;
167*4882a593Smuzhiyun 		if (gre_flags & GRE_SEQ)
168*4882a593Smuzhiyun 			nhoff += 4;
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 		skb->cb[0] = nhoff;
171*4882a593Smuzhiyun 		parse_eth_proto(skb, gre_proto);
172*4882a593Smuzhiyun 		break;
173*4882a593Smuzhiyun 	}
174*4882a593Smuzhiyun 	case IPPROTO_IPIP:
175*4882a593Smuzhiyun 		parse_eth_proto(skb, ETH_P_IP);
176*4882a593Smuzhiyun 		break;
177*4882a593Smuzhiyun 	case IPPROTO_IPV6:
178*4882a593Smuzhiyun 		parse_eth_proto(skb, ETH_P_IPV6);
179*4882a593Smuzhiyun 		break;
180*4882a593Smuzhiyun 	case IPPROTO_TCP:
181*4882a593Smuzhiyun 	case IPPROTO_UDP:
182*4882a593Smuzhiyun 		g->flow.ports = load_word(skb, nhoff);
183*4882a593Smuzhiyun 	case IPPROTO_ICMP:
184*4882a593Smuzhiyun 		g->flow.ip_proto = ip_proto;
185*4882a593Smuzhiyun 		update_stats(skb, g);
186*4882a593Smuzhiyun 		break;
187*4882a593Smuzhiyun 	default:
188*4882a593Smuzhiyun 		break;
189*4882a593Smuzhiyun 	}
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun 
PROG(PARSE_IP)192*4882a593Smuzhiyun PROG(PARSE_IP)(struct __sk_buff *skb)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	struct globals *g = this_cpu_globals();
195*4882a593Smuzhiyun 	__u32 nhoff, verlen, ip_proto;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	if (!g)
198*4882a593Smuzhiyun 		return 0;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	nhoff = skb->cb[0];
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	if (unlikely(ip_is_fragment(skb, nhoff)))
203*4882a593Smuzhiyun 		return 0;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	if (ip_proto != IPPROTO_GRE) {
208*4882a593Smuzhiyun 		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
209*4882a593Smuzhiyun 		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
213*4882a593Smuzhiyun 	nhoff += (verlen & 0xF) << 2;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	skb->cb[0] = nhoff;
216*4882a593Smuzhiyun 	parse_ip_proto(skb, g, ip_proto);
217*4882a593Smuzhiyun 	return 0;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun 
PROG(PARSE_IPV6)220*4882a593Smuzhiyun PROG(PARSE_IPV6)(struct __sk_buff *skb)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun 	struct globals *g = this_cpu_globals();
223*4882a593Smuzhiyun 	__u32 nhoff, ip_proto;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	if (!g)
226*4882a593Smuzhiyun 		return 0;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	nhoff = skb->cb[0];
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	ip_proto = load_byte(skb,
231*4882a593Smuzhiyun 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
232*4882a593Smuzhiyun 	g->flow.src = ipv6_addr_hash(skb,
233*4882a593Smuzhiyun 				     nhoff + offsetof(struct ipv6hdr, saddr));
234*4882a593Smuzhiyun 	g->flow.dst = ipv6_addr_hash(skb,
235*4882a593Smuzhiyun 				     nhoff + offsetof(struct ipv6hdr, daddr));
236*4882a593Smuzhiyun 	nhoff += sizeof(struct ipv6hdr);
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	skb->cb[0] = nhoff;
239*4882a593Smuzhiyun 	parse_ip_proto(skb, g, ip_proto);
240*4882a593Smuzhiyun 	return 0;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun 
PROG(PARSE_VLAN)243*4882a593Smuzhiyun PROG(PARSE_VLAN)(struct __sk_buff *skb)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	__u32 nhoff, proto;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	nhoff = skb->cb[0];
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
250*4882a593Smuzhiyun 						h_vlan_encapsulated_proto));
251*4882a593Smuzhiyun 	nhoff += sizeof(struct vlan_hdr);
252*4882a593Smuzhiyun 	skb->cb[0] = nhoff;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	parse_eth_proto(skb, proto);
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	return 0;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
PROG(PARSE_MPLS)259*4882a593Smuzhiyun PROG(PARSE_MPLS)(struct __sk_buff *skb)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun 	__u32 nhoff, label;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	nhoff = skb->cb[0];
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	label = load_word(skb, nhoff);
266*4882a593Smuzhiyun 	nhoff += sizeof(struct mpls_label);
267*4882a593Smuzhiyun 	skb->cb[0] = nhoff;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	if (label & MPLS_LS_S_MASK) {
270*4882a593Smuzhiyun 		__u8 verlen = load_byte(skb, nhoff);
271*4882a593Smuzhiyun 		if ((verlen & 0xF0) == 4)
272*4882a593Smuzhiyun 			parse_eth_proto(skb, ETH_P_IP);
273*4882a593Smuzhiyun 		else
274*4882a593Smuzhiyun 			parse_eth_proto(skb, ETH_P_IPV6);
275*4882a593Smuzhiyun 	} else {
276*4882a593Smuzhiyun 		parse_eth_proto(skb, ETH_P_MPLS_UC);
277*4882a593Smuzhiyun 	}
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	return 0;
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun SEC("socket/0")
main_prog(struct __sk_buff * skb)283*4882a593Smuzhiyun int main_prog(struct __sk_buff *skb)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun 	__u32 nhoff = ETH_HLEN;
286*4882a593Smuzhiyun 	__u32 proto = load_half(skb, 12);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	skb->cb[0] = nhoff;
289*4882a593Smuzhiyun 	parse_eth_proto(skb, proto);
290*4882a593Smuzhiyun 	return 0;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
294