xref: /OK3568_Linux_fs/kernel/net/ipv6/seg6_iptunnel.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *  SR-IPv6 implementation
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *  Author:
6*4882a593Smuzhiyun  *  David Lebrun <david.lebrun@uclouvain.be>
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/types.h>
10*4882a593Smuzhiyun #include <linux/skbuff.h>
11*4882a593Smuzhiyun #include <linux/net.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <net/ip.h>
14*4882a593Smuzhiyun #include <net/ip_tunnels.h>
15*4882a593Smuzhiyun #include <net/lwtunnel.h>
16*4882a593Smuzhiyun #include <net/netevent.h>
17*4882a593Smuzhiyun #include <net/netns/generic.h>
18*4882a593Smuzhiyun #include <net/ip6_fib.h>
19*4882a593Smuzhiyun #include <net/route.h>
20*4882a593Smuzhiyun #include <net/seg6.h>
21*4882a593Smuzhiyun #include <linux/seg6.h>
22*4882a593Smuzhiyun #include <linux/seg6_iptunnel.h>
23*4882a593Smuzhiyun #include <net/addrconf.h>
24*4882a593Smuzhiyun #include <net/ip6_route.h>
25*4882a593Smuzhiyun #include <net/dst_cache.h>
26*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
27*4882a593Smuzhiyun #include <net/seg6_hmac.h>
28*4882a593Smuzhiyun #endif
29*4882a593Smuzhiyun 
seg6_lwt_headroom(struct seg6_iptunnel_encap * tuninfo)30*4882a593Smuzhiyun static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun 	int head = 0;
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 	switch (tuninfo->mode) {
35*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_INLINE:
36*4882a593Smuzhiyun 		break;
37*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_ENCAP:
38*4882a593Smuzhiyun 		head = sizeof(struct ipv6hdr);
39*4882a593Smuzhiyun 		break;
40*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_L2ENCAP:
41*4882a593Smuzhiyun 		return 0;
42*4882a593Smuzhiyun 	}
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	return ((tuninfo->srh->hdrlen + 1) << 3) + head;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun struct seg6_lwt {
48*4882a593Smuzhiyun 	struct dst_cache cache;
49*4882a593Smuzhiyun 	struct seg6_iptunnel_encap tuninfo[];
50*4882a593Smuzhiyun };
51*4882a593Smuzhiyun 
seg6_lwt_lwtunnel(struct lwtunnel_state * lwt)52*4882a593Smuzhiyun static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
53*4882a593Smuzhiyun {
54*4882a593Smuzhiyun 	return (struct seg6_lwt *)lwt->data;
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun static inline struct seg6_iptunnel_encap *
seg6_encap_lwtunnel(struct lwtunnel_state * lwt)58*4882a593Smuzhiyun seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	return seg6_lwt_lwtunnel(lwt)->tuninfo;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
64*4882a593Smuzhiyun 	[SEG6_IPTUNNEL_SRH]	= { .type = NLA_BINARY },
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
nla_put_srh(struct sk_buff * skb,int attrtype,struct seg6_iptunnel_encap * tuninfo)67*4882a593Smuzhiyun static int nla_put_srh(struct sk_buff *skb, int attrtype,
68*4882a593Smuzhiyun 		       struct seg6_iptunnel_encap *tuninfo)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *data;
71*4882a593Smuzhiyun 	struct nlattr *nla;
72*4882a593Smuzhiyun 	int len;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	nla = nla_reserve(skb, attrtype, len);
77*4882a593Smuzhiyun 	if (!nla)
78*4882a593Smuzhiyun 		return -EMSGSIZE;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	data = nla_data(nla);
81*4882a593Smuzhiyun 	memcpy(data, tuninfo, len);
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	return 0;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun 
set_tun_src(struct net * net,struct net_device * dev,struct in6_addr * daddr,struct in6_addr * saddr)86*4882a593Smuzhiyun static void set_tun_src(struct net *net, struct net_device *dev,
87*4882a593Smuzhiyun 			struct in6_addr *daddr, struct in6_addr *saddr)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun 	struct seg6_pernet_data *sdata = seg6_pernet(net);
90*4882a593Smuzhiyun 	struct in6_addr *tun_src;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	rcu_read_lock();
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	tun_src = rcu_dereference(sdata->tun_src);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	if (!ipv6_addr_any(tun_src)) {
97*4882a593Smuzhiyun 		memcpy(saddr, tun_src, sizeof(struct in6_addr));
98*4882a593Smuzhiyun 	} else {
99*4882a593Smuzhiyun 		ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
100*4882a593Smuzhiyun 				   saddr);
101*4882a593Smuzhiyun 	}
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	rcu_read_unlock();
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun /* Compute flowlabel for outer IPv6 header */
seg6_make_flowlabel(struct net * net,struct sk_buff * skb,struct ipv6hdr * inner_hdr)107*4882a593Smuzhiyun static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
108*4882a593Smuzhiyun 				  struct ipv6hdr *inner_hdr)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun 	int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
111*4882a593Smuzhiyun 	__be32 flowlabel = 0;
112*4882a593Smuzhiyun 	u32 hash;
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	if (do_flowlabel > 0) {
115*4882a593Smuzhiyun 		hash = skb_get_hash(skb);
116*4882a593Smuzhiyun 		hash = rol32(hash, 16);
117*4882a593Smuzhiyun 		flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
118*4882a593Smuzhiyun 	} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
119*4882a593Smuzhiyun 		flowlabel = ip6_flowlabel(inner_hdr);
120*4882a593Smuzhiyun 	}
121*4882a593Smuzhiyun 	return flowlabel;
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
seg6_do_srh_encap(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,int proto)125*4882a593Smuzhiyun int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun 	struct dst_entry *dst = skb_dst(skb);
128*4882a593Smuzhiyun 	struct net *net = dev_net(dst->dev);
129*4882a593Smuzhiyun 	struct ipv6hdr *hdr, *inner_hdr;
130*4882a593Smuzhiyun 	struct ipv6_sr_hdr *isrh;
131*4882a593Smuzhiyun 	int hdrlen, tot_len, err;
132*4882a593Smuzhiyun 	__be32 flowlabel;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	hdrlen = (osrh->hdrlen + 1) << 3;
135*4882a593Smuzhiyun 	tot_len = hdrlen + sizeof(*hdr);
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	err = skb_cow_head(skb, tot_len + skb->mac_len);
138*4882a593Smuzhiyun 	if (unlikely(err))
139*4882a593Smuzhiyun 		return err;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	inner_hdr = ipv6_hdr(skb);
142*4882a593Smuzhiyun 	flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	skb_push(skb, tot_len);
145*4882a593Smuzhiyun 	skb_reset_network_header(skb);
146*4882a593Smuzhiyun 	skb_mac_header_rebuild(skb);
147*4882a593Smuzhiyun 	hdr = ipv6_hdr(skb);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	/* inherit tc, flowlabel and hlim
150*4882a593Smuzhiyun 	 * hlim will be decremented in ip6_forward() afterwards and
151*4882a593Smuzhiyun 	 * decapsulation will overwrite inner hlim with outer hlim
152*4882a593Smuzhiyun 	 */
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	if (skb->protocol == htons(ETH_P_IPV6)) {
155*4882a593Smuzhiyun 		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
156*4882a593Smuzhiyun 			     flowlabel);
157*4882a593Smuzhiyun 		hdr->hop_limit = inner_hdr->hop_limit;
158*4882a593Smuzhiyun 	} else {
159*4882a593Smuzhiyun 		ip6_flow_hdr(hdr, 0, flowlabel);
160*4882a593Smuzhiyun 		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 		/* the control block has been erased, so we have to set the
165*4882a593Smuzhiyun 		 * iif once again.
166*4882a593Smuzhiyun 		 * We read the receiving interface index directly from the
167*4882a593Smuzhiyun 		 * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
168*4882a593Smuzhiyun 		 * ip_rcv_core(...)).
169*4882a593Smuzhiyun 		 */
170*4882a593Smuzhiyun 		IP6CB(skb)->iif = skb->skb_iif;
171*4882a593Smuzhiyun 	}
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	hdr->nexthdr = NEXTHDR_ROUTING;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	isrh = (void *)hdr + sizeof(*hdr);
176*4882a593Smuzhiyun 	memcpy(isrh, osrh, hdrlen);
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	isrh->nexthdr = proto;
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	hdr->daddr = isrh->segments[isrh->first_segment];
181*4882a593Smuzhiyun 	set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
184*4882a593Smuzhiyun 	if (sr_has_hmac(isrh)) {
185*4882a593Smuzhiyun 		err = seg6_push_hmac(net, &hdr->saddr, isrh);
186*4882a593Smuzhiyun 		if (unlikely(err))
187*4882a593Smuzhiyun 			return err;
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun #endif
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	skb_postpush_rcsum(skb, hdr, tot_len);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	return 0;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun /* insert an SRH within an IPv6 packet, just after the IPv6 header */
seg6_do_srh_inline(struct sk_buff * skb,struct ipv6_sr_hdr * osrh)200*4882a593Smuzhiyun int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	struct ipv6hdr *hdr, *oldhdr;
203*4882a593Smuzhiyun 	struct ipv6_sr_hdr *isrh;
204*4882a593Smuzhiyun 	int hdrlen, err;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	hdrlen = (osrh->hdrlen + 1) << 3;
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	err = skb_cow_head(skb, hdrlen + skb->mac_len);
209*4882a593Smuzhiyun 	if (unlikely(err))
210*4882a593Smuzhiyun 		return err;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	oldhdr = ipv6_hdr(skb);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	skb_pull(skb, sizeof(struct ipv6hdr));
215*4882a593Smuzhiyun 	skb_postpull_rcsum(skb, skb_network_header(skb),
216*4882a593Smuzhiyun 			   sizeof(struct ipv6hdr));
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
219*4882a593Smuzhiyun 	skb_reset_network_header(skb);
220*4882a593Smuzhiyun 	skb_mac_header_rebuild(skb);
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	hdr = ipv6_hdr(skb);
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	memmove(hdr, oldhdr, sizeof(*hdr));
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	isrh = (void *)hdr + sizeof(*hdr);
227*4882a593Smuzhiyun 	memcpy(isrh, osrh, hdrlen);
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	isrh->nexthdr = hdr->nexthdr;
230*4882a593Smuzhiyun 	hdr->nexthdr = NEXTHDR_ROUTING;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	isrh->segments[0] = hdr->daddr;
233*4882a593Smuzhiyun 	hdr->daddr = isrh->segments[isrh->first_segment];
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
236*4882a593Smuzhiyun 	if (sr_has_hmac(isrh)) {
237*4882a593Smuzhiyun 		struct net *net = dev_net(skb_dst(skb)->dev);
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 		err = seg6_push_hmac(net, &hdr->saddr, isrh);
240*4882a593Smuzhiyun 		if (unlikely(err))
241*4882a593Smuzhiyun 			return err;
242*4882a593Smuzhiyun 	}
243*4882a593Smuzhiyun #endif
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	return 0;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
252*4882a593Smuzhiyun 
seg6_do_srh(struct sk_buff * skb)253*4882a593Smuzhiyun static int seg6_do_srh(struct sk_buff *skb)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	struct dst_entry *dst = skb_dst(skb);
256*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *tinfo;
257*4882a593Smuzhiyun 	int proto, err = 0;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	switch (tinfo->mode) {
262*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_INLINE:
263*4882a593Smuzhiyun 		if (skb->protocol != htons(ETH_P_IPV6))
264*4882a593Smuzhiyun 			return -EINVAL;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 		err = seg6_do_srh_inline(skb, tinfo->srh);
267*4882a593Smuzhiyun 		if (err)
268*4882a593Smuzhiyun 			return err;
269*4882a593Smuzhiyun 		break;
270*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_ENCAP:
271*4882a593Smuzhiyun 		err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
272*4882a593Smuzhiyun 		if (err)
273*4882a593Smuzhiyun 			return err;
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 		if (skb->protocol == htons(ETH_P_IPV6))
276*4882a593Smuzhiyun 			proto = IPPROTO_IPV6;
277*4882a593Smuzhiyun 		else if (skb->protocol == htons(ETH_P_IP))
278*4882a593Smuzhiyun 			proto = IPPROTO_IPIP;
279*4882a593Smuzhiyun 		else
280*4882a593Smuzhiyun 			return -EINVAL;
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
283*4882a593Smuzhiyun 		if (err)
284*4882a593Smuzhiyun 			return err;
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 		skb_set_inner_transport_header(skb, skb_transport_offset(skb));
287*4882a593Smuzhiyun 		skb_set_inner_protocol(skb, skb->protocol);
288*4882a593Smuzhiyun 		skb->protocol = htons(ETH_P_IPV6);
289*4882a593Smuzhiyun 		break;
290*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_L2ENCAP:
291*4882a593Smuzhiyun 		if (!skb_mac_header_was_set(skb))
292*4882a593Smuzhiyun 			return -EINVAL;
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 		if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
295*4882a593Smuzhiyun 			return -ENOMEM;
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 		skb_mac_header_rebuild(skb);
298*4882a593Smuzhiyun 		skb_push(skb, skb->mac_len);
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 		err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
301*4882a593Smuzhiyun 		if (err)
302*4882a593Smuzhiyun 			return err;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		skb->protocol = htons(ETH_P_IPV6);
305*4882a593Smuzhiyun 		break;
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	return 0;
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun 
seg6_input(struct sk_buff * skb)313*4882a593Smuzhiyun static int seg6_input(struct sk_buff *skb)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun 	struct dst_entry *orig_dst = skb_dst(skb);
316*4882a593Smuzhiyun 	struct dst_entry *dst = NULL;
317*4882a593Smuzhiyun 	struct seg6_lwt *slwt;
318*4882a593Smuzhiyun 	int err;
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	err = seg6_do_srh(skb);
321*4882a593Smuzhiyun 	if (unlikely(err)) {
322*4882a593Smuzhiyun 		kfree_skb(skb);
323*4882a593Smuzhiyun 		return err;
324*4882a593Smuzhiyun 	}
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	preempt_disable();
329*4882a593Smuzhiyun 	dst = dst_cache_get(&slwt->cache);
330*4882a593Smuzhiyun 	preempt_enable();
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	skb_dst_drop(skb);
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 	if (!dst) {
335*4882a593Smuzhiyun 		ip6_route_input(skb);
336*4882a593Smuzhiyun 		dst = skb_dst(skb);
337*4882a593Smuzhiyun 		if (!dst->error) {
338*4882a593Smuzhiyun 			preempt_disable();
339*4882a593Smuzhiyun 			dst_cache_set_ip6(&slwt->cache, dst,
340*4882a593Smuzhiyun 					  &ipv6_hdr(skb)->saddr);
341*4882a593Smuzhiyun 			preempt_enable();
342*4882a593Smuzhiyun 		}
343*4882a593Smuzhiyun 	} else {
344*4882a593Smuzhiyun 		skb_dst_set(skb, dst);
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
348*4882a593Smuzhiyun 	if (unlikely(err))
349*4882a593Smuzhiyun 		return err;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	return dst_input(skb);
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun 
seg6_output(struct net * net,struct sock * sk,struct sk_buff * skb)354*4882a593Smuzhiyun static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
355*4882a593Smuzhiyun {
356*4882a593Smuzhiyun 	struct dst_entry *orig_dst = skb_dst(skb);
357*4882a593Smuzhiyun 	struct dst_entry *dst = NULL;
358*4882a593Smuzhiyun 	struct seg6_lwt *slwt;
359*4882a593Smuzhiyun 	int err = -EINVAL;
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	err = seg6_do_srh(skb);
362*4882a593Smuzhiyun 	if (unlikely(err))
363*4882a593Smuzhiyun 		goto drop;
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	preempt_disable();
368*4882a593Smuzhiyun 	dst = dst_cache_get(&slwt->cache);
369*4882a593Smuzhiyun 	preempt_enable();
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	if (unlikely(!dst)) {
372*4882a593Smuzhiyun 		struct ipv6hdr *hdr = ipv6_hdr(skb);
373*4882a593Smuzhiyun 		struct flowi6 fl6;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 		memset(&fl6, 0, sizeof(fl6));
376*4882a593Smuzhiyun 		fl6.daddr = hdr->daddr;
377*4882a593Smuzhiyun 		fl6.saddr = hdr->saddr;
378*4882a593Smuzhiyun 		fl6.flowlabel = ip6_flowinfo(hdr);
379*4882a593Smuzhiyun 		fl6.flowi6_mark = skb->mark;
380*4882a593Smuzhiyun 		fl6.flowi6_proto = hdr->nexthdr;
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 		dst = ip6_route_output(net, NULL, &fl6);
383*4882a593Smuzhiyun 		if (dst->error) {
384*4882a593Smuzhiyun 			err = dst->error;
385*4882a593Smuzhiyun 			dst_release(dst);
386*4882a593Smuzhiyun 			goto drop;
387*4882a593Smuzhiyun 		}
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		preempt_disable();
390*4882a593Smuzhiyun 		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
391*4882a593Smuzhiyun 		preempt_enable();
392*4882a593Smuzhiyun 	}
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	skb_dst_drop(skb);
395*4882a593Smuzhiyun 	skb_dst_set(skb, dst);
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
398*4882a593Smuzhiyun 	if (unlikely(err))
399*4882a593Smuzhiyun 		goto drop;
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	return dst_output(net, sk, skb);
402*4882a593Smuzhiyun drop:
403*4882a593Smuzhiyun 	kfree_skb(skb);
404*4882a593Smuzhiyun 	return err;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun 
seg6_build_state(struct net * net,struct nlattr * nla,unsigned int family,const void * cfg,struct lwtunnel_state ** ts,struct netlink_ext_ack * extack)407*4882a593Smuzhiyun static int seg6_build_state(struct net *net, struct nlattr *nla,
408*4882a593Smuzhiyun 			    unsigned int family, const void *cfg,
409*4882a593Smuzhiyun 			    struct lwtunnel_state **ts,
410*4882a593Smuzhiyun 			    struct netlink_ext_ack *extack)
411*4882a593Smuzhiyun {
412*4882a593Smuzhiyun 	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
413*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *tuninfo;
414*4882a593Smuzhiyun 	struct lwtunnel_state *newts;
415*4882a593Smuzhiyun 	int tuninfo_len, min_size;
416*4882a593Smuzhiyun 	struct seg6_lwt *slwt;
417*4882a593Smuzhiyun 	int err;
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	if (family != AF_INET && family != AF_INET6)
420*4882a593Smuzhiyun 		return -EINVAL;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
423*4882a593Smuzhiyun 					  seg6_iptunnel_policy, extack);
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	if (err < 0)
426*4882a593Smuzhiyun 		return err;
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	if (!tb[SEG6_IPTUNNEL_SRH])
429*4882a593Smuzhiyun 		return -EINVAL;
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 	tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
432*4882a593Smuzhiyun 	tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	/* tuninfo must contain at least the iptunnel encap structure,
435*4882a593Smuzhiyun 	 * the SRH and one segment
436*4882a593Smuzhiyun 	 */
437*4882a593Smuzhiyun 	min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
438*4882a593Smuzhiyun 		   sizeof(struct in6_addr);
439*4882a593Smuzhiyun 	if (tuninfo_len < min_size)
440*4882a593Smuzhiyun 		return -EINVAL;
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	switch (tuninfo->mode) {
443*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_INLINE:
444*4882a593Smuzhiyun 		if (family != AF_INET6)
445*4882a593Smuzhiyun 			return -EINVAL;
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 		break;
448*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_ENCAP:
449*4882a593Smuzhiyun 		break;
450*4882a593Smuzhiyun 	case SEG6_IPTUN_MODE_L2ENCAP:
451*4882a593Smuzhiyun 		break;
452*4882a593Smuzhiyun 	default:
453*4882a593Smuzhiyun 		return -EINVAL;
454*4882a593Smuzhiyun 	}
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	/* verify that SRH is consistent */
457*4882a593Smuzhiyun 	if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
458*4882a593Smuzhiyun 		return -EINVAL;
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
461*4882a593Smuzhiyun 	if (!newts)
462*4882a593Smuzhiyun 		return -ENOMEM;
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 	slwt = seg6_lwt_lwtunnel(newts);
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
467*4882a593Smuzhiyun 	if (err) {
468*4882a593Smuzhiyun 		kfree(newts);
469*4882a593Smuzhiyun 		return err;
470*4882a593Smuzhiyun 	}
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	newts->type = LWTUNNEL_ENCAP_SEG6;
475*4882a593Smuzhiyun 	newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 	if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
478*4882a593Smuzhiyun 		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	newts->headroom = seg6_lwt_headroom(tuninfo);
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	*ts = newts;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	return 0;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun 
seg6_destroy_state(struct lwtunnel_state * lwt)487*4882a593Smuzhiyun static void seg6_destroy_state(struct lwtunnel_state *lwt)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun 	dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun 
seg6_fill_encap_info(struct sk_buff * skb,struct lwtunnel_state * lwtstate)492*4882a593Smuzhiyun static int seg6_fill_encap_info(struct sk_buff *skb,
493*4882a593Smuzhiyun 				struct lwtunnel_state *lwtstate)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 	if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
498*4882a593Smuzhiyun 		return -EMSGSIZE;
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 	return 0;
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
seg6_encap_nlsize(struct lwtunnel_state * lwtstate)503*4882a593Smuzhiyun static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
504*4882a593Smuzhiyun {
505*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun 	return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
508*4882a593Smuzhiyun }
509*4882a593Smuzhiyun 
seg6_encap_cmp(struct lwtunnel_state * a,struct lwtunnel_state * b)510*4882a593Smuzhiyun static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
511*4882a593Smuzhiyun {
512*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
513*4882a593Smuzhiyun 	struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
514*4882a593Smuzhiyun 	int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
517*4882a593Smuzhiyun 		return 1;
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	return memcmp(a_hdr, b_hdr, len);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun static const struct lwtunnel_encap_ops seg6_iptun_ops = {
523*4882a593Smuzhiyun 	.build_state = seg6_build_state,
524*4882a593Smuzhiyun 	.destroy_state = seg6_destroy_state,
525*4882a593Smuzhiyun 	.output = seg6_output,
526*4882a593Smuzhiyun 	.input = seg6_input,
527*4882a593Smuzhiyun 	.fill_encap = seg6_fill_encap_info,
528*4882a593Smuzhiyun 	.get_encap_size = seg6_encap_nlsize,
529*4882a593Smuzhiyun 	.cmp_encap = seg6_encap_cmp,
530*4882a593Smuzhiyun 	.owner = THIS_MODULE,
531*4882a593Smuzhiyun };
532*4882a593Smuzhiyun 
seg6_iptunnel_init(void)533*4882a593Smuzhiyun int __init seg6_iptunnel_init(void)
534*4882a593Smuzhiyun {
535*4882a593Smuzhiyun 	return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun 
seg6_iptunnel_exit(void)538*4882a593Smuzhiyun void seg6_iptunnel_exit(void)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun 	lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
541*4882a593Smuzhiyun }
542