1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * SR-IPv6 implementation
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Author:
6*4882a593Smuzhiyun * David Lebrun <david.lebrun@uclouvain.be>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/types.h>
10*4882a593Smuzhiyun #include <linux/skbuff.h>
11*4882a593Smuzhiyun #include <linux/net.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <net/ip.h>
14*4882a593Smuzhiyun #include <net/ip_tunnels.h>
15*4882a593Smuzhiyun #include <net/lwtunnel.h>
16*4882a593Smuzhiyun #include <net/netevent.h>
17*4882a593Smuzhiyun #include <net/netns/generic.h>
18*4882a593Smuzhiyun #include <net/ip6_fib.h>
19*4882a593Smuzhiyun #include <net/route.h>
20*4882a593Smuzhiyun #include <net/seg6.h>
21*4882a593Smuzhiyun #include <linux/seg6.h>
22*4882a593Smuzhiyun #include <linux/seg6_iptunnel.h>
23*4882a593Smuzhiyun #include <net/addrconf.h>
24*4882a593Smuzhiyun #include <net/ip6_route.h>
25*4882a593Smuzhiyun #include <net/dst_cache.h>
26*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
27*4882a593Smuzhiyun #include <net/seg6_hmac.h>
28*4882a593Smuzhiyun #endif
29*4882a593Smuzhiyun
seg6_lwt_headroom(struct seg6_iptunnel_encap * tuninfo)30*4882a593Smuzhiyun static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun int head = 0;
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun switch (tuninfo->mode) {
35*4882a593Smuzhiyun case SEG6_IPTUN_MODE_INLINE:
36*4882a593Smuzhiyun break;
37*4882a593Smuzhiyun case SEG6_IPTUN_MODE_ENCAP:
38*4882a593Smuzhiyun head = sizeof(struct ipv6hdr);
39*4882a593Smuzhiyun break;
40*4882a593Smuzhiyun case SEG6_IPTUN_MODE_L2ENCAP:
41*4882a593Smuzhiyun return 0;
42*4882a593Smuzhiyun }
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun return ((tuninfo->srh->hdrlen + 1) << 3) + head;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun struct seg6_lwt {
48*4882a593Smuzhiyun struct dst_cache cache;
49*4882a593Smuzhiyun struct seg6_iptunnel_encap tuninfo[];
50*4882a593Smuzhiyun };
51*4882a593Smuzhiyun
seg6_lwt_lwtunnel(struct lwtunnel_state * lwt)52*4882a593Smuzhiyun static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
53*4882a593Smuzhiyun {
54*4882a593Smuzhiyun return (struct seg6_lwt *)lwt->data;
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun static inline struct seg6_iptunnel_encap *
seg6_encap_lwtunnel(struct lwtunnel_state * lwt)58*4882a593Smuzhiyun seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun return seg6_lwt_lwtunnel(lwt)->tuninfo;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
64*4882a593Smuzhiyun [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun
nla_put_srh(struct sk_buff * skb,int attrtype,struct seg6_iptunnel_encap * tuninfo)67*4882a593Smuzhiyun static int nla_put_srh(struct sk_buff *skb, int attrtype,
68*4882a593Smuzhiyun struct seg6_iptunnel_encap *tuninfo)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun struct seg6_iptunnel_encap *data;
71*4882a593Smuzhiyun struct nlattr *nla;
72*4882a593Smuzhiyun int len;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun nla = nla_reserve(skb, attrtype, len);
77*4882a593Smuzhiyun if (!nla)
78*4882a593Smuzhiyun return -EMSGSIZE;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun data = nla_data(nla);
81*4882a593Smuzhiyun memcpy(data, tuninfo, len);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun return 0;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
set_tun_src(struct net * net,struct net_device * dev,struct in6_addr * daddr,struct in6_addr * saddr)86*4882a593Smuzhiyun static void set_tun_src(struct net *net, struct net_device *dev,
87*4882a593Smuzhiyun struct in6_addr *daddr, struct in6_addr *saddr)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun struct seg6_pernet_data *sdata = seg6_pernet(net);
90*4882a593Smuzhiyun struct in6_addr *tun_src;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun rcu_read_lock();
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun tun_src = rcu_dereference(sdata->tun_src);
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun if (!ipv6_addr_any(tun_src)) {
97*4882a593Smuzhiyun memcpy(saddr, tun_src, sizeof(struct in6_addr));
98*4882a593Smuzhiyun } else {
99*4882a593Smuzhiyun ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
100*4882a593Smuzhiyun saddr);
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun rcu_read_unlock();
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /* Compute flowlabel for outer IPv6 header */
seg6_make_flowlabel(struct net * net,struct sk_buff * skb,struct ipv6hdr * inner_hdr)107*4882a593Smuzhiyun static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
108*4882a593Smuzhiyun struct ipv6hdr *inner_hdr)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
111*4882a593Smuzhiyun __be32 flowlabel = 0;
112*4882a593Smuzhiyun u32 hash;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (do_flowlabel > 0) {
115*4882a593Smuzhiyun hash = skb_get_hash(skb);
116*4882a593Smuzhiyun hash = rol32(hash, 16);
117*4882a593Smuzhiyun flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
118*4882a593Smuzhiyun } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
119*4882a593Smuzhiyun flowlabel = ip6_flowlabel(inner_hdr);
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun return flowlabel;
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
seg6_do_srh_encap(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,int proto)125*4882a593Smuzhiyun int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun struct dst_entry *dst = skb_dst(skb);
128*4882a593Smuzhiyun struct net *net = dev_net(dst->dev);
129*4882a593Smuzhiyun struct ipv6hdr *hdr, *inner_hdr;
130*4882a593Smuzhiyun struct ipv6_sr_hdr *isrh;
131*4882a593Smuzhiyun int hdrlen, tot_len, err;
132*4882a593Smuzhiyun __be32 flowlabel;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun hdrlen = (osrh->hdrlen + 1) << 3;
135*4882a593Smuzhiyun tot_len = hdrlen + sizeof(*hdr);
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun err = skb_cow_head(skb, tot_len + skb->mac_len);
138*4882a593Smuzhiyun if (unlikely(err))
139*4882a593Smuzhiyun return err;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun inner_hdr = ipv6_hdr(skb);
142*4882a593Smuzhiyun flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun skb_push(skb, tot_len);
145*4882a593Smuzhiyun skb_reset_network_header(skb);
146*4882a593Smuzhiyun skb_mac_header_rebuild(skb);
147*4882a593Smuzhiyun hdr = ipv6_hdr(skb);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /* inherit tc, flowlabel and hlim
150*4882a593Smuzhiyun * hlim will be decremented in ip6_forward() afterwards and
151*4882a593Smuzhiyun * decapsulation will overwrite inner hlim with outer hlim
152*4882a593Smuzhiyun */
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IPV6)) {
155*4882a593Smuzhiyun ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
156*4882a593Smuzhiyun flowlabel);
157*4882a593Smuzhiyun hdr->hop_limit = inner_hdr->hop_limit;
158*4882a593Smuzhiyun } else {
159*4882a593Smuzhiyun ip6_flow_hdr(hdr, 0, flowlabel);
160*4882a593Smuzhiyun hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun /* the control block has been erased, so we have to set the
165*4882a593Smuzhiyun * iif once again.
166*4882a593Smuzhiyun * We read the receiving interface index directly from the
167*4882a593Smuzhiyun * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
168*4882a593Smuzhiyun * ip_rcv_core(...)).
169*4882a593Smuzhiyun */
170*4882a593Smuzhiyun IP6CB(skb)->iif = skb->skb_iif;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun hdr->nexthdr = NEXTHDR_ROUTING;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun isrh = (void *)hdr + sizeof(*hdr);
176*4882a593Smuzhiyun memcpy(isrh, osrh, hdrlen);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun isrh->nexthdr = proto;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun hdr->daddr = isrh->segments[isrh->first_segment];
181*4882a593Smuzhiyun set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
184*4882a593Smuzhiyun if (sr_has_hmac(isrh)) {
185*4882a593Smuzhiyun err = seg6_push_hmac(net, &hdr->saddr, isrh);
186*4882a593Smuzhiyun if (unlikely(err))
187*4882a593Smuzhiyun return err;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun #endif
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun skb_postpush_rcsum(skb, hdr, tot_len);
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun return 0;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /* insert an SRH within an IPv6 packet, just after the IPv6 header */
seg6_do_srh_inline(struct sk_buff * skb,struct ipv6_sr_hdr * osrh)200*4882a593Smuzhiyun int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun struct ipv6hdr *hdr, *oldhdr;
203*4882a593Smuzhiyun struct ipv6_sr_hdr *isrh;
204*4882a593Smuzhiyun int hdrlen, err;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun hdrlen = (osrh->hdrlen + 1) << 3;
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun err = skb_cow_head(skb, hdrlen + skb->mac_len);
209*4882a593Smuzhiyun if (unlikely(err))
210*4882a593Smuzhiyun return err;
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun oldhdr = ipv6_hdr(skb);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun skb_pull(skb, sizeof(struct ipv6hdr));
215*4882a593Smuzhiyun skb_postpull_rcsum(skb, skb_network_header(skb),
216*4882a593Smuzhiyun sizeof(struct ipv6hdr));
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
219*4882a593Smuzhiyun skb_reset_network_header(skb);
220*4882a593Smuzhiyun skb_mac_header_rebuild(skb);
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun hdr = ipv6_hdr(skb);
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun memmove(hdr, oldhdr, sizeof(*hdr));
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun isrh = (void *)hdr + sizeof(*hdr);
227*4882a593Smuzhiyun memcpy(isrh, osrh, hdrlen);
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun isrh->nexthdr = hdr->nexthdr;
230*4882a593Smuzhiyun hdr->nexthdr = NEXTHDR_ROUTING;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun isrh->segments[0] = hdr->daddr;
233*4882a593Smuzhiyun hdr->daddr = isrh->segments[isrh->first_segment];
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SEG6_HMAC
236*4882a593Smuzhiyun if (sr_has_hmac(isrh)) {
237*4882a593Smuzhiyun struct net *net = dev_net(skb_dst(skb)->dev);
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun err = seg6_push_hmac(net, &hdr->saddr, isrh);
240*4882a593Smuzhiyun if (unlikely(err))
241*4882a593Smuzhiyun return err;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun #endif
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun return 0;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
252*4882a593Smuzhiyun
seg6_do_srh(struct sk_buff * skb)253*4882a593Smuzhiyun static int seg6_do_srh(struct sk_buff *skb)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun struct dst_entry *dst = skb_dst(skb);
256*4882a593Smuzhiyun struct seg6_iptunnel_encap *tinfo;
257*4882a593Smuzhiyun int proto, err = 0;
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun tinfo = seg6_encap_lwtunnel(dst->lwtstate);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun switch (tinfo->mode) {
262*4882a593Smuzhiyun case SEG6_IPTUN_MODE_INLINE:
263*4882a593Smuzhiyun if (skb->protocol != htons(ETH_P_IPV6))
264*4882a593Smuzhiyun return -EINVAL;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun err = seg6_do_srh_inline(skb, tinfo->srh);
267*4882a593Smuzhiyun if (err)
268*4882a593Smuzhiyun return err;
269*4882a593Smuzhiyun break;
270*4882a593Smuzhiyun case SEG6_IPTUN_MODE_ENCAP:
271*4882a593Smuzhiyun err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
272*4882a593Smuzhiyun if (err)
273*4882a593Smuzhiyun return err;
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IPV6))
276*4882a593Smuzhiyun proto = IPPROTO_IPV6;
277*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IP))
278*4882a593Smuzhiyun proto = IPPROTO_IPIP;
279*4882a593Smuzhiyun else
280*4882a593Smuzhiyun return -EINVAL;
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun err = seg6_do_srh_encap(skb, tinfo->srh, proto);
283*4882a593Smuzhiyun if (err)
284*4882a593Smuzhiyun return err;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun skb_set_inner_transport_header(skb, skb_transport_offset(skb));
287*4882a593Smuzhiyun skb_set_inner_protocol(skb, skb->protocol);
288*4882a593Smuzhiyun skb->protocol = htons(ETH_P_IPV6);
289*4882a593Smuzhiyun break;
290*4882a593Smuzhiyun case SEG6_IPTUN_MODE_L2ENCAP:
291*4882a593Smuzhiyun if (!skb_mac_header_was_set(skb))
292*4882a593Smuzhiyun return -EINVAL;
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
295*4882a593Smuzhiyun return -ENOMEM;
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun skb_mac_header_rebuild(skb);
298*4882a593Smuzhiyun skb_push(skb, skb->mac_len);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
301*4882a593Smuzhiyun if (err)
302*4882a593Smuzhiyun return err;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun skb->protocol = htons(ETH_P_IPV6);
305*4882a593Smuzhiyun break;
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun skb_set_transport_header(skb, sizeof(struct ipv6hdr));
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun return 0;
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun
seg6_input(struct sk_buff * skb)313*4882a593Smuzhiyun static int seg6_input(struct sk_buff *skb)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun struct dst_entry *orig_dst = skb_dst(skb);
316*4882a593Smuzhiyun struct dst_entry *dst = NULL;
317*4882a593Smuzhiyun struct seg6_lwt *slwt;
318*4882a593Smuzhiyun int err;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun err = seg6_do_srh(skb);
321*4882a593Smuzhiyun if (unlikely(err)) {
322*4882a593Smuzhiyun kfree_skb(skb);
323*4882a593Smuzhiyun return err;
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun preempt_disable();
329*4882a593Smuzhiyun dst = dst_cache_get(&slwt->cache);
330*4882a593Smuzhiyun preempt_enable();
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun skb_dst_drop(skb);
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun if (!dst) {
335*4882a593Smuzhiyun ip6_route_input(skb);
336*4882a593Smuzhiyun dst = skb_dst(skb);
337*4882a593Smuzhiyun if (!dst->error) {
338*4882a593Smuzhiyun preempt_disable();
339*4882a593Smuzhiyun dst_cache_set_ip6(&slwt->cache, dst,
340*4882a593Smuzhiyun &ipv6_hdr(skb)->saddr);
341*4882a593Smuzhiyun preempt_enable();
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun } else {
344*4882a593Smuzhiyun skb_dst_set(skb, dst);
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
348*4882a593Smuzhiyun if (unlikely(err))
349*4882a593Smuzhiyun return err;
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun return dst_input(skb);
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun
seg6_output(struct net * net,struct sock * sk,struct sk_buff * skb)354*4882a593Smuzhiyun static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
355*4882a593Smuzhiyun {
356*4882a593Smuzhiyun struct dst_entry *orig_dst = skb_dst(skb);
357*4882a593Smuzhiyun struct dst_entry *dst = NULL;
358*4882a593Smuzhiyun struct seg6_lwt *slwt;
359*4882a593Smuzhiyun int err = -EINVAL;
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun err = seg6_do_srh(skb);
362*4882a593Smuzhiyun if (unlikely(err))
363*4882a593Smuzhiyun goto drop;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun preempt_disable();
368*4882a593Smuzhiyun dst = dst_cache_get(&slwt->cache);
369*4882a593Smuzhiyun preempt_enable();
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun if (unlikely(!dst)) {
372*4882a593Smuzhiyun struct ipv6hdr *hdr = ipv6_hdr(skb);
373*4882a593Smuzhiyun struct flowi6 fl6;
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun memset(&fl6, 0, sizeof(fl6));
376*4882a593Smuzhiyun fl6.daddr = hdr->daddr;
377*4882a593Smuzhiyun fl6.saddr = hdr->saddr;
378*4882a593Smuzhiyun fl6.flowlabel = ip6_flowinfo(hdr);
379*4882a593Smuzhiyun fl6.flowi6_mark = skb->mark;
380*4882a593Smuzhiyun fl6.flowi6_proto = hdr->nexthdr;
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun dst = ip6_route_output(net, NULL, &fl6);
383*4882a593Smuzhiyun if (dst->error) {
384*4882a593Smuzhiyun err = dst->error;
385*4882a593Smuzhiyun dst_release(dst);
386*4882a593Smuzhiyun goto drop;
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun preempt_disable();
390*4882a593Smuzhiyun dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
391*4882a593Smuzhiyun preempt_enable();
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun skb_dst_drop(skb);
395*4882a593Smuzhiyun skb_dst_set(skb, dst);
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
398*4882a593Smuzhiyun if (unlikely(err))
399*4882a593Smuzhiyun goto drop;
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun return dst_output(net, sk, skb);
402*4882a593Smuzhiyun drop:
403*4882a593Smuzhiyun kfree_skb(skb);
404*4882a593Smuzhiyun return err;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun
seg6_build_state(struct net * net,struct nlattr * nla,unsigned int family,const void * cfg,struct lwtunnel_state ** ts,struct netlink_ext_ack * extack)407*4882a593Smuzhiyun static int seg6_build_state(struct net *net, struct nlattr *nla,
408*4882a593Smuzhiyun unsigned int family, const void *cfg,
409*4882a593Smuzhiyun struct lwtunnel_state **ts,
410*4882a593Smuzhiyun struct netlink_ext_ack *extack)
411*4882a593Smuzhiyun {
412*4882a593Smuzhiyun struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
413*4882a593Smuzhiyun struct seg6_iptunnel_encap *tuninfo;
414*4882a593Smuzhiyun struct lwtunnel_state *newts;
415*4882a593Smuzhiyun int tuninfo_len, min_size;
416*4882a593Smuzhiyun struct seg6_lwt *slwt;
417*4882a593Smuzhiyun int err;
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun if (family != AF_INET && family != AF_INET6)
420*4882a593Smuzhiyun return -EINVAL;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
423*4882a593Smuzhiyun seg6_iptunnel_policy, extack);
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun if (err < 0)
426*4882a593Smuzhiyun return err;
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun if (!tb[SEG6_IPTUNNEL_SRH])
429*4882a593Smuzhiyun return -EINVAL;
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
432*4882a593Smuzhiyun tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun /* tuninfo must contain at least the iptunnel encap structure,
435*4882a593Smuzhiyun * the SRH and one segment
436*4882a593Smuzhiyun */
437*4882a593Smuzhiyun min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
438*4882a593Smuzhiyun sizeof(struct in6_addr);
439*4882a593Smuzhiyun if (tuninfo_len < min_size)
440*4882a593Smuzhiyun return -EINVAL;
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun switch (tuninfo->mode) {
443*4882a593Smuzhiyun case SEG6_IPTUN_MODE_INLINE:
444*4882a593Smuzhiyun if (family != AF_INET6)
445*4882a593Smuzhiyun return -EINVAL;
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun break;
448*4882a593Smuzhiyun case SEG6_IPTUN_MODE_ENCAP:
449*4882a593Smuzhiyun break;
450*4882a593Smuzhiyun case SEG6_IPTUN_MODE_L2ENCAP:
451*4882a593Smuzhiyun break;
452*4882a593Smuzhiyun default:
453*4882a593Smuzhiyun return -EINVAL;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun /* verify that SRH is consistent */
457*4882a593Smuzhiyun if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
458*4882a593Smuzhiyun return -EINVAL;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
461*4882a593Smuzhiyun if (!newts)
462*4882a593Smuzhiyun return -ENOMEM;
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun slwt = seg6_lwt_lwtunnel(newts);
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
467*4882a593Smuzhiyun if (err) {
468*4882a593Smuzhiyun kfree(newts);
469*4882a593Smuzhiyun return err;
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun newts->type = LWTUNNEL_ENCAP_SEG6;
475*4882a593Smuzhiyun newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
478*4882a593Smuzhiyun newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun newts->headroom = seg6_lwt_headroom(tuninfo);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun *ts = newts;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun return 0;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun
seg6_destroy_state(struct lwtunnel_state * lwt)487*4882a593Smuzhiyun static void seg6_destroy_state(struct lwtunnel_state *lwt)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun
seg6_fill_encap_info(struct sk_buff * skb,struct lwtunnel_state * lwtstate)492*4882a593Smuzhiyun static int seg6_fill_encap_info(struct sk_buff *skb,
493*4882a593Smuzhiyun struct lwtunnel_state *lwtstate)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
498*4882a593Smuzhiyun return -EMSGSIZE;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun return 0;
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun
seg6_encap_nlsize(struct lwtunnel_state * lwtstate)503*4882a593Smuzhiyun static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
504*4882a593Smuzhiyun {
505*4882a593Smuzhiyun struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
508*4882a593Smuzhiyun }
509*4882a593Smuzhiyun
seg6_encap_cmp(struct lwtunnel_state * a,struct lwtunnel_state * b)510*4882a593Smuzhiyun static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
511*4882a593Smuzhiyun {
512*4882a593Smuzhiyun struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
513*4882a593Smuzhiyun struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
514*4882a593Smuzhiyun int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
517*4882a593Smuzhiyun return 1;
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun return memcmp(a_hdr, b_hdr, len);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun static const struct lwtunnel_encap_ops seg6_iptun_ops = {
523*4882a593Smuzhiyun .build_state = seg6_build_state,
524*4882a593Smuzhiyun .destroy_state = seg6_destroy_state,
525*4882a593Smuzhiyun .output = seg6_output,
526*4882a593Smuzhiyun .input = seg6_input,
527*4882a593Smuzhiyun .fill_encap = seg6_fill_encap_info,
528*4882a593Smuzhiyun .get_encap_size = seg6_encap_nlsize,
529*4882a593Smuzhiyun .cmp_encap = seg6_encap_cmp,
530*4882a593Smuzhiyun .owner = THIS_MODULE,
531*4882a593Smuzhiyun };
532*4882a593Smuzhiyun
seg6_iptunnel_init(void)533*4882a593Smuzhiyun int __init seg6_iptunnel_init(void)
534*4882a593Smuzhiyun {
535*4882a593Smuzhiyun return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun
seg6_iptunnel_exit(void)538*4882a593Smuzhiyun void seg6_iptunnel_exit(void)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
541*4882a593Smuzhiyun }
542